Skip to content

Commit

Permalink
Merge pull request #244 from tcezard/test_files
Browse files Browse the repository at this point in the history
GA4GHTT-261 - Add Test files for STR and new phasing notation
  • Loading branch information
tcezard authored Jun 13, 2024
2 parents 2ae356d + 94bd383 commit 004c7e5
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 0 deletions.
15 changes: 15 additions & 0 deletions test/input_files/v4.4/failed/failed_STR_001.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
##fileformat=VCFv4.4
##CauseOfFailure=cardinality is inconsistent between RN and RUS
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the longest variant described in this record">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of allele">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
chr1 130 . G <CNV:TR> . . END=130;SVLEN=1;CN=20;RUS=CAG,CA;RN=1;RB=60 GT:PS 1|0:100
16 changes: 16 additions & 0 deletions test/input_files/v4.4/failed/failed_STR_002.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
##fileformat=VCFv4.4
##CauseOfFailure=cardinality is inconsistent between RB and CIRB
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the longest variant described in this record">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of allele">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=CIRB,Number=.,Type=Integer,Description="Confidence interval around RB">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
chr1 130 . G <CNV:TR> . . END=130;SVLEN=1;CN=20;RUS=CAG;RN=1;RB=60;CIRB=-2 GT:PS 1|0:100
16 changes: 16 additions & 0 deletions test/input_files/v4.4/passed/passed_STR.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
##fileformat=VCFv4.4
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the longest variant described in this record">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of allele">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
chr1 100 cnv_notation T <CNV:TR>,<CNV:TR> . . END=130;SVLEN=30,30;CN=3,.5;RUS=CAG,CAG;RB=90,15 GT:PS:CN 1|2:100:3.9666
chr1 130 . G <CNV:TR> . . END=130;SVLEN=1;CN=20;RUS=CAG;RN=1;RB=60 GT:PS 1|0:100
2 changes: 2 additions & 0 deletions test/input_files/v4.4/passed/passed_body_format.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
1 300 rs180734498 C T 100 PASS AC=4 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
1 400 rs182711216 C T 100 PASS AC=4 GT:G_S:GL 0|0:0.000:-0.18,-0.48,-2.49 0|0:0.000:-0.20,-0.44,-2.06
1 500 rs182711216 C T 100 PASS AC=4 GT:G%3AS:GL 0|0:0.000:-0.18,-0.48,-2.49 0|0:0.000:-0.20,-0.44,-2.06
1 600 rs182711216 C T 100 PASS AC=4 GT:G%3AS:GL |0|0:0.000:-0.18,-0.48,-2.49 |0|0:0.000:-0.20,-0.44,-2.06
1 700 rs182711216 C T 100 PASS AC=4 GT:G%3AS:GL \0\0:0.000:-0.18,-0.48,-2.49 |0|0:0.000:-0.20,-0.44,-2.06

0 comments on commit 004c7e5

Please sign in to comment.