diff --git a/Makefile b/Makefile index 7013cd594..1005c084a 100644 --- a/Makefile +++ b/Makefile @@ -104,7 +104,7 @@ endif include config.mk -PACKAGE_VERSION = 1.18 +PACKAGE_VERSION = 1.19 # If building from a Git repository, replace $(PACKAGE_VERSION) with the Git # description of the working tree: either a release tag with the same value @@ -233,8 +233,11 @@ abuf_h = abuf.h $(htslib_vcf_h) dbuf_h = dbuf.h $(htslib_vcf_h) bam2bcf_h = bam2bcf.h $(htslib_hts_h) $(htslib_vcf_h) bam_sample_h = bam_sample.h $(htslib_sam_h) +cigar_state_h = cigar_state.h $(htslib_hts_h) $(htslib_sam_h) +read_consensus_h = read_consensus.h $(htslib_hts_h) $(htslib_sam_h) +str_finder_h = str_finder.h utlist.h -str_finder.o: str_finder.h utlist.h +str_finder.o: str_finder.c $(str_finder_h) utlist.h main.o: main.c $(htslib_hts_h) config.h version.h $(bcftools_h) vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) $(convert_h) $(smpl_ilist_h) regidx.h $(htslib_khash_h) $(dbuf_h) vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) $(bcftools_h) vcmp.h $(filter_h) @@ -242,11 +245,11 @@ vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reade vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(bcftools_h) vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h) vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h regidx.h -vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(bcftools_h) extsort.h +vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(htslib_bgzf_h) $(bcftools_h) extsort.h filter.h vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h) vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h) -vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h) -vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h gff.h +vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(htslib_kbitset_h) $(htslib_hts_endian_h) $(bcftools_h) regidx.h vcmp.h $(htslib_khash_h) $(htslib_kbitset_h) +vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h gff.h regidx.h vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(smpl_ilist_h) vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h) vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h @@ -254,7 +257,7 @@ vcfhead.o: vcfhead.c $(htslib_kstring_h) $(htslib_vcf_h) $(bcftools_h) vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_hts_os_h) kheap.h $(bcftools_h) vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h dist.h -vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h) +vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h) reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) $(khash_str2str_h) tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h) ccall.o: ccall.c $(htslib_kfunc_h) $(call_h) kmin.h $(prob1_h) @@ -275,12 +278,12 @@ bin.o: bin.c $(bcftools_h) bin.h dist.o: dist.c dist.h cols.o: cols.c cols.h regidx.o: regidx.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_khash_str2int_h) regidx.h -consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h) +consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) regidx.h $(bcftools_h) rbuf.h $(filter_h) $(smpl_ilist_h) mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(htslib_hts_os_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h) bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h -bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h -bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h read_consensus.h cigar_state.h -read_consensus.o: read_consensus.c read_consensus.h cigar_state.h $(htslib_hts_h) $(htslib_sam_h) +bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) $(str_finder_h) +bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bcftools_h) $(bam2bcf_h) $(htslib_ksort_h) $(read_consensus_h) $(cigar_state_h) +read_consensus.o: read_consensus.c $(read_consensus_h) $(cigar_state_h) $(bcftools_h) kheap.h bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h) version.o: version.h version.c hclust.o: hclust.c $(htslib_hts_h) $(htslib_kstring_h) $(bcftools_h) hclust.h @@ -289,8 +292,8 @@ vcfbuf.o: vcfbuf.c $(htslib_vcf_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcf abuf.o: abuf.c $(htslib_vcf_h) $(bcftools_h) rbuf.h abuf.h extsort.o: extsort.c $(bcftools_h) extsort.h kheap.h smpl_ilist.o: smpl_ilist.c $(bcftools_h) $(smpl_ilist_h) -gff.o: gff.c gff.h regidx.h -csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h +gff.o: gff.c $(htslib_hts_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) gff.h regidx.h +csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(htslib_bgzf_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h gff.h # test programs @@ -300,7 +303,7 @@ csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(hts # # If using MSYS, avoid poor shell expansion via: # MSYS2_ARG_CONV_EXCL="*" make check -check test-no-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX) +check-no-plugins test-no-plugins: $(PROGRAMS) $(TEST_PROGRAMS) $(BGZIP) $(TABIX) ./test/test-rbuf ./test/test-regidx REF_PATH=: ./test/test.pl --exec bgzip=$(BGZIP) --exec tabix=$(TABIX) --htsdir=$(HTSDIR) $${TEST_OPTS:-} diff --git a/NEWS b/NEWS index 62c4699ac..90c43022b 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,127 @@ -## Release 1.18 (25th July 2023) +## Release 1.19 (12th December 2023) + + +Changes affecting the whole of bcftools, or multiple commands: + +* Filtering expressions can be given a file with list of strings to match, this + was previously possible only for the ID column. For example + + ID=@file .. selects lines with ID present in the file + INFO/TAG=@file.txt .. selects lines where TAG has a string value listed in the file + INFO/TAG!=@file.txt .. TAG must not have a string value listed in the file + + Allow to query REF,ALT columns directly, for example + + -e 'REF="N"' + + +Changes affecting specific commands: + +* bcftools annotate + + - Fix `bcftools annotate --mark-sites`, VCF sites overlapping regions in a BED file + were not annotated (#1989) + + - Add flexibility to FILTER column transfers and allow transfers within the same file, + across files, and in combination. For examples see + http://samtools.github.io/bcftools/howtos/annotate.html#transfer_filter_to_info + +* bcftools call + + - Output MIN_DP rather than MinDP in gVCF mode + + - New `-*, --keep-unseen-allele` option to output the unobserved allele <*>, + intended for gVCF. + +* bcftools head + + - New `-s, --samples` option to include the #CHROM header line with samples. + +* bcftools gtcheck + + - Add output options `-o, --output` and `-O, --output-type` + + - Add filtering options `-i, --include` and `-e, --exclude` + + - Rename the short option `-e, --error-probability` from lower case to upper + case `-E, --error-probability` + + - Changes to the output format, replace the DC section with DCv2: + + - adds a new column for the number of matching genotypes + + - The --error-probability is newly interpreted as the probability of erroneous + allele rather than genotype. In other words, the calculation of the discordance + score now considers the probability of genotyping error to be different + for HOM and HET genotypes, i.e. P(0/1|dsg=0) > P(1/1|dsg=0). + + - fixes in HWE score calculation plus output average HWE score rather + than absolute HWE score + + - better description of fields +* bcftools merge + + - Add `-m` modifiers to suppress the output of the unseen allele <*> or + at variant sites (e.g. `-m both,*`) or all sites (e.g. `-m both,**`) + +* bcftools mpileup + + - Output MIN_DP rather than MinDP in gVCF mode + +* bcftools norm + + - Add the number of joined lines to the summary output, for example + + Lines total/split/joined/realigned/skipped: 6/0/3/0/0 + + - Allow combining -m and -a with --old-rec-tag (#2020) + + - Symbolic alleles caused norm to expand REF to the full length of the deletion. + This was not intended and problematic for long deletions, the REF allele should list + one base only (#2029) + +* bcftools query + + - Add new `-N, --disable-automatic-newline` option for pre-1.18 query formatting behavior + when newline would not be added when missing + + - Make the automatic addition of the newline character in a more predictable way and, + when missing, always put it at the end of the expression. In version 1.18 it could + be added at the end of the expression (for per-site expressions) or inside the square + brackets (for per-sample expressions). The new behavior is: + + - if the formatting expression contains a newline character, do nothing + - if there is no newline character and -N, --disable-automatic-newline is given, do nothing + - if there is no newline character and -N is not given, insert newline at the end of the expression + + See #1969 for details + + - Add new `-F, --print-filtered` option to output a default string for samples that would otherwise + be filtered by `-i/-e` expressions. + + - Include sample name in the output header with `-H` whenever it makes sense (#1992) + +* bcftools +spit-vep + + - Fix on the fly filtering involving numeric subfields, e.g. `-i 'MAX_AF<0.001'` (#2039) + + - Interpret default column type names (--columns-types) as entire strings, rather than + substrings to avoid unexpected spurious matches (i.e. internally add ^ and $ to all + field names) + +* bcftools +trio-dnm2 + + - Do not flag paternal genotyping errors as de novo mutations. Specifically, when father's + chrX genotype is 0/1 and mother's 0/0, 0/1 in the child will not be marked as DNM. + +* bcftools view + + - Add new `-A, --trim-unseen-allele` option to remove the unseen allele <*> or + at variant sites (`-A`) or all sites (`-AA`) + + +## Release 1.18 (25th July 2023) Changes affecting the whole of bcftools, or multiple commands: diff --git a/abuf.c b/abuf.c index 7958cf570..ea5e1b373 100644 --- a/abuf.c +++ b/abuf.c @@ -411,12 +411,12 @@ static void _split_table_set_info(abuf_t *buf, bcf_info_t *info, merge_rule_t mo buf->tmp2 = dst.s; ret = bcf_update_info(buf->out_hdr, out, tag, buf->tmp2, dst.l, type); } - if ( ret!=0 ) error("An error occurred while updating INFO/%s\n",tag); + if ( ret!=0 ) error("An error occurred while updating INFO/%s (errcode=%d)\n",tag,ret); } } static void _split_table_set_history(abuf_t *buf) { - int i,j; + int i,j,ret; bcf1_t *rec = buf->split.rec; buf->tmps.l = 0; ksprintf(&buf->tmps,"%s|%"PRIhts_pos"|%s|",bcf_seqname(buf->hdr,rec),rec->pos+1,rec->d.allele[0]); @@ -441,8 +441,8 @@ static void _split_table_set_history(abuf_t *buf) kputc(',',&buf->tmps); } buf->tmps.s[--buf->tmps.l] = 0; - if ( (bcf_update_info_string(buf->out_hdr, out, buf->split.info_tag, buf->tmps.s))!=0 ) - error("An error occurred while updating INFO/%s\n",buf->split.info_tag); + if ( (ret=bcf_update_info_string(buf->out_hdr, out, buf->split.info_tag, buf->tmps.s))!=0 ) + error("An error occurred while updating INFO/%s (errcode=%d)\n",buf->split.info_tag,ret); } } static void _split_table_set_gt(abuf_t *buf) @@ -668,7 +668,7 @@ static void _split_table_set_format(abuf_t *buf, bcf_fmt_t *fmt, merge_rule_t mo #undef BRANCH ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, 3*(1+star_allele)*nsmpl, type); } - if ( ret!=0 ) error("An error occurred while updating FORMAT/%s\n",tag); + if ( ret!=0 ) error("An error occurred while updating FORMAT/%s (errcode=%d)\n",tag,ret); } } static inline int _is_acgtn(char *seq) diff --git a/bcftools.h b/bcftools.h index bba71e3b6..328f19d7e 100644 --- a/bcftools.h +++ b/bcftools.h @@ -141,4 +141,16 @@ static inline int bcf_double_test(double d, uint64_t value) #define bcf_double_is_missing(x) bcf_double_test((x),bcf_double_missing) #define bcf_double_is_missing_or_vector_end(x) (bcf_double_test((x),bcf_double_missing) || bcf_double_test((x),bcf_double_vector_end)) +static inline int get_unseen_allele(bcf1_t *line) +{ + int i; + for (i=1; in_allele; i++) + { + if ( !strcmp(line->d.allele[i],"<*>") ) return i; + if ( !strcmp(line->d.allele[i],"") ) return i; + if ( !strcmp(line->d.allele[i],"") ) return i; + } + return 0; +} + #endif diff --git a/call.h b/call.h index 16bf0b68e..090ac019a 100644 --- a/call.h +++ b/call.h @@ -33,7 +33,7 @@ THE SOFTWARE. */ #define CALL_VARONLY (1<<1) #define CALL_CONSTR_TRIO (1<<2) #define CALL_CONSTR_ALLELES (1<<3) -// +#define CALL_KEEP_UNSEEN (1<<4) #define CALL_FMT_PV4 (1<<5) #define CALL_FMT_GQ (1<<6) #define CALL_FMT_GP (1<<7) @@ -125,8 +125,7 @@ call_t; void error(const char *format, ...); /* - * call() - return -1 value on critical error; -2 to skip the site; or the number of non-reference - * alleles on success. + * call() - return -1 value on critical error; -2 to skip the site; or the number of alleles on success */ int mcall(call_t *call, bcf1_t *rec); // multiallic and rare-variant calling model int ccall(call_t *call, bcf1_t *rec); // the default consensus calling model diff --git a/convert.c b/convert.c index 07ff01862..d418dfa27 100644 --- a/convert.c +++ b/convert.c @@ -104,9 +104,11 @@ struct _convert_t char *undef_info_tag; void *used_tags_hash; char **used_tags_list; + char *print_filtered; int nused_tags; int allow_undef_tags; int force_newline; + int header_samples; uint8_t **subset_samples; }; @@ -1550,6 +1552,7 @@ void convert_destroy(convert_t *convert) free(convert->used_tags_list); } khash_str2int_destroy(convert->used_tags_hash); + free(convert->print_filtered); free(convert->fmt); free(convert->undef_info_tag); free(convert->dat); @@ -1562,6 +1565,7 @@ void convert_destroy(convert_t *convert) int convert_header(convert_t *convert, kstring_t *str) { int i, icol = 0, l_ori = str->l; + bcf_hdr_t *hdr = convert->header; // Supress the header output if LINE is present for (i=0; infmt; i++) @@ -1585,6 +1589,7 @@ int convert_header(convert_t *convert, kstring_t *str) while ( convert->fmt[j].is_gt_field ) j++; for (js=0; jsnsamples; js++) { + int ks = convert->samples[js]; for (k=i; kfmt[k].type == T_SEP ) @@ -1600,10 +1605,21 @@ int convert_header(convert_t *convert, kstring_t *str) } } } + else if ( convert->header_samples ) + ksprintf(str, "[%d]%s:%s", ++icol, hdr->samples[ks], convert->fmt[k].key); else ksprintf(str, "[%d]%s", ++icol, convert->fmt[k].key); } - if ( has_fmt_newline ) break; + if ( has_fmt_newline ) + { + if ( !convert->header_samples ) break; + + // this is unfortunate: the formatting expression breaks the per-sample output into separate lines, + // therefore including a sample name in the header makes no sense anymore + convert->header_samples = 0; + str->l = l_ori; + return convert_header(convert, str); + } } i = j-1; continue; @@ -1653,7 +1669,17 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str) { // Skip samples when filtering was requested int ks = convert->samples[js]; - if ( convert->subset_samples && *convert->subset_samples && !(*convert->subset_samples)[ks] ) continue; + if ( convert->subset_samples && *convert->subset_samples && !(*convert->subset_samples)[ks] ) + { + if ( !convert->print_filtered ) continue; + + for (k=i; kfmt[k].type==T_SEP ) + convert->fmt[k].handler(convert, line, &convert->fmt[k], ks, str); + else + kputs(convert->print_filtered, str); + continue; + } // Here comes a hack designed for TBCSQ. When running on large files, // such as 1000GP, there are too many empty fields in the output and @@ -1709,29 +1735,18 @@ static void force_newline_(convert_t *convert) } if ( has_newline ) return; - // A newline is not present, force it. But where to add it? - // Consider - // -f'%CHROM[ %SAMPLE]\n' - // vs - // -f'[%CHROM %SAMPLE\n]' - for (i=0; infmt; i++) - if ( !convert->fmt[i].is_gt_field && convert->fmt[i].key ) break; - - if ( i < convert->nfmt ) - register_tag(convert, "\n", 0, T_SEP); // the first case - else - { - // the second case - i = convert->nfmt - 1; - if ( !convert->fmt[i].key ) - { - convert->fmt[i].key = strdup("\n"); - convert->fmt[i].is_gt_field = 1; - register_tag(convert, NULL, 0, T_SEP); - } - else - register_tag(convert, "\n", 1, T_SEP); - } + // A newline is not present, force it. But where to add it? Always at the end. + // + // Briefly, in 1.18, we considered the following automatic behavior, which for + // per-site output it would add it at the end of the expression and for per-sample + // output it would add it inside the square brackets: + // -f'%CHROM[ %SAMPLE]\n' + // -f'[%CHROM %SAMPLE\n]' + // + // However, this is an annoyance for users, as it is not entirely clear what + // will happen unless one understands the internals well (#1969) + + register_tag(convert, "\n", 0, T_SEP); } int convert_set_option(convert_t *convert, enum convert_option opt, ...) @@ -1748,6 +1763,12 @@ int convert_set_option(convert_t *convert, enum convert_option opt, ...) case subset_samples: convert->subset_samples = va_arg(args, uint8_t**); break; + case header_samples: + convert->header_samples = va_arg(args, int); + break; + case print_filtered: + convert->print_filtered = strdup(va_arg(args, char*)); + break; case force_newline: convert->force_newline = va_arg(args, int); if ( convert->force_newline ) force_newline_(convert); diff --git a/convert.h b/convert.h index 062607093..188b38124 100644 --- a/convert.h +++ b/convert.h @@ -30,9 +30,11 @@ THE SOFTWARE. */ typedef struct _convert_t convert_t; enum convert_option { - allow_undef_tags, - subset_samples, - force_newline, + allow_undef_tags, // see `bcftools query --allow-undef-tags`, throws an error if tag is not defined otherwise + subset_samples, // in bracketed expressions (e.g. [ %GT]) consider only marked samples + header_samples, // include sample name in bracketed tags (e.g. SAMPLE1:GT SAMPLE2:GT for [ %GT]) + force_newline, // automatically insert a newline when not part of the formatting expression + print_filtered, // print the provided string instead of discarding samples not included in subset_samples }; convert_t *convert_init(bcf_hdr_t *hdr, int *samples, int nsamples, const char *str); diff --git a/doc/bcftools.1 b/doc/bcftools.1 index c940065fb..83d79e574 100644 --- a/doc/bcftools.1 +++ b/doc/bcftools.1 @@ -2,12 +2,12 @@ .\" Title: bcftools .\" Author: [see the "AUTHOR(S)" section] .\" Generator: Asciidoctor 2.0.16.dev -.\" Date: 2023-07-25 +.\" Date: 2023-12-12 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "BCFTOOLS" "1" "2023-07-25" "\ \&" "\ \&" +.TH "BCFTOOLS" "1" "2023-12-12" "\ \&" "\ \&" .ie \n(.g .ds Aq \(aq .el .ds Aq ' .ss \n[.ss] 0 @@ -51,7 +51,7 @@ standard input (stdin) and outputs to the standard output (stdout). Several commands can thus be combined with Unix pipes. .SS "VERSION" .sp -This manual page was last updated \fB2023\-07\-25\fP and refers to bcftools git version \fB1.18\fP. +This manual page was last updated \fB2023\-12\-12\fP and refers to bcftools git version \fB1.19\fP. .SS "BCF1" .sp The obsolete BCF1 format output by versions of samtools <= 0.1.19 is \fBnot\fP @@ -938,7 +938,7 @@ Automatically index the output file # that INFO/END is already present in the VCF header. bcftools annotate \-a annots.tab.gz \-c CHROM,POS,~ID,REF,ALT,INFO/END input.vcf - # For more examples see http://samtools.github.io/bcftools/howtos/annotate.html + # For (many) more examples see http://samtools.github.io/bcftools/howtos/annotate.html .fam .fi .if n .RE @@ -1054,6 +1054,11 @@ output all alternate alleles present in the alignments even if they do not appear in any of the genotypes .RE .sp +\fB\-\fP*\fB, \-\-keep\-unseen\-allele\fP +.RS 4 +keep the unobserved allele <*> or , useful mainly for gVCF output +.RE +.sp \fB\-f, \-\-format\-fields\fP \fIlist\fP .RS 4 comma\-separated list of FORMAT fields to output for each sample. Currently @@ -2495,7 +2500,13 @@ in\-memory sorting and DIR is the temporary directory for external sorting. This Stop after first record to estimate required time. .RE .sp -\fB\-e, \-\-error\-probability\fP \fIINT\fP +\fB\-e, \-\-exclude\fP [\fIqry\fP|\fIgt\fP]:\*(AqEXPRESSION\*(Aq +.RS 4 +Exclude sites from query file (\fIqry:\fP) or genotype file (\fIgt:\fP) for which \fIEXPRESSION\fP is true. +For valid expressions see \fBEXPRESSIONS\fP. +.RE +.sp +\fB\-E, \-\-error\-probability\fP \fIINT\fP .RS 4 Interpret genotypes and genotype likelihoods probabilistically. The value of \fIINT\fP represents genotype quality when GT tag is used (e.g. Q=30 represents one error in 1,000 genotypes and @@ -2505,13 +2516,20 @@ non\-zero integer can be provided). .br \~ .br -If \fB\-e\fP is set to 0, the discordance score can be interpreted as the number of mismatching genotypes, +If \fB\-E\fP is set to 0, the discordance score can be interpreted as the number of mismatching genotypes, but only in the GT\-vs\-GT matching mode. See the \fB\-u, \-\-use\fP option below for additional notes and caveats. \~ .br \~ .br -If performance is an issue, set \fB\-e 0\fP for faster run times but less accurate results. +If performance is an issue, set \fB\-E 0\fP for faster run times but less accurate results. +\~ +.br +\~ +.br +Note that in previous versions of bcftools (\(lA1.18), this option used to be a smaller case \fB\-e\fP. It +changed to make room for the filtering option \fB\-e, \-\-exclude\fP to stay consistent across other +commands. .RE .sp \fB\-g, \-\-genotypes\fP \fIFILE\fP @@ -2524,6 +2542,12 @@ VCF/BCF file with reference genotypes to compare against Homozygous genotypes only, useful with low coverage data (requires \fB\-g, \-\-genotypes\fP) .RE .sp +\fB\-i, \-\-include\fP [\fIqry\fP|\fIgt\fP]:\*(AqEXPRESSION\*(Aq +.RS 4 +Include sites from query file (\fIqry:\fP) or genotype file (\fIgt:\fP) for which \fIEXPRESSION\fP is true. +For valid expressions see \fBEXPRESSIONS\fP. +.RE +.sp \fB\-\-n\-matches\fP \fIINT\fP .RS 4 Print only top INT matches for each sample, 0 for unlimited. Use negative value @@ -2537,6 +2561,16 @@ Disable calculation of HWE probability to reduce memory requirements with comparisons between very large number of sample pairs. .RE .sp +\fB\-o, \-\-output\fP \fIFILE\fP +.RS 4 +Write to \fIFILE\fP rather than to standard output, where it is written by default. +.RE +.sp +\fB\-O, \-\-output\-type\fP \fIt\fP|\fIz\fP +.RS 4 +Write a plain (\fIt\fP) or compressed (\fIz\fP) text tab\-delimited output. +.RE +.sp \fB\-p, \-\-pairs\fP \fILIST\fP .RS 4 A comma\-separated list of sample pairs to compare. When the \fB\-g\fP option is given, the first @@ -2600,7 +2634,7 @@ By default, the PL tag is used in the query file and, when available, the GT tag .br Note that when the requested tag is not available, the program will attempt to use the other tag. The output includes the number of sites that were matched by the four -possible mode (for example GT\-vs\-GT or GT\-vs\-PL). +possible modes (for example GT\-vs\-GT or GT\-vs\-PL). .RE .sp \fBExamples:\fP @@ -2608,10 +2642,10 @@ possible mode (for example GT\-vs\-GT or GT\-vs\-PL). .if n .RS 4 .nf .fam C - # Check discordance of all samples from B against all sample in A + # Check discordance of all samples from B against all samples in A bcftools gtcheck \-g A.bcf B.bcf - # Limit comparisons to the fiven list of samples + # Limit comparisons to the given list of samples bcftools gtcheck \-s gt:a1,a2,a3 \-s qry:b1,b2 \-g A.bcf B.bcf # Compare only two pairs a1,b1 and a1,b2 @@ -2642,6 +2676,14 @@ By default, all header lines are displayed. Also display the first \fIINT\fP variant records. By default, no variant records are displayed. .RE +.sp +\fB\-s, \-\-samples\fP \fIINT\fP +.RS 4 +Display the first \fIINT\fP variant records including the last #CHROM header line with samples. +Running with \fB\-s 0\fP alone outputs the #CHROM header line only. Note that +the list of samples, with each sample per line, can be obtained with \f(CRbcftools query\fP using +the option \fB\-l, \-\-list\-samples\fP. +.RE .SS "bcftools index [\fIOPTIONS\fP] \fIin.bcf\fP|\fIin.vcf.gz\fP" .sp Creates index for bgzip compressed VCF/BCF files for random access. CSI @@ -2796,7 +2838,7 @@ see \fBCommon Options\fP .sp \fB\-w, \-\-write\fP \fILIST\fP .RS 4 -list of input files to output given as 1\-based indices. With \fB\-p\fP and no +comma\-separated list of input files to output given as 1\-based indices. With \fB\-p\fP and no \fB\-w\fP, all files are written. .RE .sp @@ -2945,9 +2987,11 @@ maximum number of alternate alleles that can be included in the PL tag. The defa is 0 which disables the feature and outputs values for all alternate alleles. .RE .sp -\fB\-m, \-\-merge\fP \fIsnps\fP|\fIindels\fP|\fIboth\fP|\fIsnp\-ins\-del\fP|\fIall\fP|\fInone\fP|\fIid\fP +\fB\-m, \-\-merge\fP \fIsnps\fP|\fIindels\fP|\fIboth\fP|\fIsnp\-ins\-del\fP|\fIall\fP|\fInone\fP|\fIid\fP[,\fI*\fP] .RS 4 -The option controls what types of multiallelic records can be created: +The option controls what types of multiallelic records can be created. If single asterisk +\fI\fB\fP is appended, the unobserved allele \fI<\fP>\fP or \fI\fP will be removed at variant sites; +if two asterisks \fI**\fP are appended, the unobserved allele will be removed all sites. .RE .sp .if n .RS 4 @@ -2957,6 +3001,8 @@ The option controls what types of multiallelic records can be created: \-m snps .. allow multiallelic SNP records \-m indels .. allow multiallelic indel records \-m both .. both SNP and indel records can be multiallelic +\-m both,* .. same as above but remove <*> (or ) from variant sites +\-m both,** .. same as above but remove <*> (or ) at all sites \-m all .. SNP records can be merged with indel records \-m snp\-ins\-del .. allow multiallelic SNVs, insertions, deletions, but don\*(Aqt mix them \-m id .. merge by ID @@ -4428,6 +4474,13 @@ continue even when some samples requested via \fB\-s/\-S\fP do not exist learn by example, see below .RE .sp +\fB\-F, \-\-print\-filtered\fP \fISTR\fP +.RS 4 +by default, samples failing \fB\-i/\-e\fP filtering expressions are suppressed from output +when FORMAT fields are queried (for example \fI%CHROM %POS [ %GT]\fP). With \fB\-F\fP, such +fields will be still printed but instead of their actual value, \fISTR\fP will be used. +.RE +.sp \fB\-H, \-\-print\-header\fP .RS 4 print header @@ -4444,6 +4497,15 @@ include only sites for which \fIEXPRESSION\fP is true. For valid expressions see list sample names and exit .RE .sp +\fB\-N, \-\-disable\-automatic\-newline\fP +.RS 4 +disable automatic addition of a missing newline character at the end of the formatting +expression. By default, the program checks if the expression contains a newline +and appends it if not, to prevent formatting the entire output into a single +line by mistake. Note that versions prior to 1.18 had no automatic check and newline +had to be included explicitly. +.RE +.sp \fB\-o, \-\-output\fP \fIFILE\fP .RS 4 see \fBCommon Options\fP @@ -5123,6 +5185,12 @@ Automatically index the output file .RE .SS "Subset options:" .sp +\fB\-A, \-\-trim\-unseen\-alleles\fP +.RS 4 +remove the unseen allele \fI<*>\fP or \fI\fP at variant sites when the option is given once (\-A) or +at all sites when the options is given twice (\fI\-AA\fP). +.RE +.sp \fB\-a, \-\-trim\-alt\-alleles\fP .RS 4 remove alleles not seen in the genotype fields from the ALT column. Note that if no alternate allele @@ -5315,6 +5383,95 @@ important libraries used by bcftools. .SS "bcftools [\fI\-\-version\-only\fP]" .sp Display the full bcftools version number in a machine\-readable format. +.SH "SCRIPTS" +.SS "gff2gff" +.sp +Attempts to fix a GFF file to be correctly parsed by \fBcsq\fP. +.sp +.if n .RS 4 +.nf +.fam C +zcat in.gff.gz | gff2gff | gzip \-c > out.gff.gz +.fam +.fi +.if n .RE +.SS "plot\-vcfstats [\fIOPTIONS\fP] \fIfile.vchk\fP [...]" +.sp +Script for processing output of \fBbcftools stats\fP. It can merge +results from multiple outputs (useful when running the stats for each +chromosome separately), plots graphs and creates a PDF presentation. +.sp +\fB\-m, \-\-merge\fP +.RS 4 +Merge vcfstats files to STDOUT, skip plotting. +.RE +.sp +\fB\-p, \-\-prefix\fP \fIDIR\fP +.RS 4 +The output directory. This directory will be created if it does not exist. +.RE +.sp +\fB\-P, \-\-no\-PDF\fP +.RS 4 +Skip the PDF creation step. +.RE +.sp +\fB\-r, \-\-rasterize\fP +.RS 4 +Rasterize PDF images for faster rendering. This is the default and the opposite of \fB\-v, \-\-vectors\fP. +.RE +.sp +\fB\-s, \-\-sample\-names\fP +.RS 4 +Use sample names for xticks rather than numeric IDs. +.RE +.sp +\fB\-t, \-\-title\fP \fISTRING\fP +.RS 4 +Identify files by these titles in plots. The option can be given multiple +times, for each ID in the \fBbcftools stats\fP output. If not +present, the script will use abbreviated source file names for the titles. +.RE +.sp +\fB\-v, \-\-vectors\fP +.RS 4 +Generate vector graphics for PDF images, the opposite of \fB\-r, \-\-rasterize\fP. +.RE +.sp +\fB\-T, \-\-main\-title\fP \fISTRING\fP +.RS 4 +Main title for the PDF. +.RE +.sp +\fBExample:\fP +.sp +.if n .RS 4 +.nf +.fam C +# Generate the stats +bcftools stats \-s \- > file.vchk +.fam +.fi +.if n .RE +.sp +.if n .RS 4 +.nf +.fam C +# Plot the stats +plot\-vcfstats \-p outdir file.vchk +.fam +.fi +.if n .RE +.sp +.if n .RS 4 +.nf +.fam C +# The final looks can be customized by editing the generated +# \*(Aqoutdir/plot.py\*(Aq script and re\-running manually +cd outdir && python plot.py && pdflatex summary.tex +.fam +.fi +.if n .RE .SH "FILTERING EXPRESSIONS" .sp These filtering expressions are accepted by most of the commands. @@ -5330,8 +5487,7 @@ These filtering expressions are accepted by most of the commands. . sp -1 . IP \(bu 2.3 .\} -numerical constants, string constants, file names (this is currently -supported only to filter by the ID column) +numerical constants, string constants, file names (indicated by the prefix \fI@\fP) .sp .if n .RS 4 .nf @@ -5609,7 +5765,7 @@ GT="A" . sp -1 . IP \(bu 2.3 .\} -TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,bnd,other,overlap). Use the regex +TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,bnd,other,overlap, see \fBTERMINOLOGY\fP). Use the regex operator "\(rs~" to require at least one allele of the given type or the equal sign "=" to require that all alleles are of the given type. Compare .sp @@ -6052,7 +6208,7 @@ AVG(GQ)>50 .. average (arithmetic mean) of genotype qualities bigger .if n .RS 4 .nf .fam C -ID=@file .. selects lines with ID present in the file +ID=@file .. selects lines with ID present in the file .fam .fi .if n .RE @@ -6060,7 +6216,15 @@ ID=@file .. selects lines with ID present in the file .if n .RS 4 .nf .fam C -ID!=@~/file .. skip lines with ID present in the ~/file +ID!=@~/file .. skip lines with ID present in the ~/file +.fam +.fi +.if n .RE +.sp +.if n .RS 4 +.nf +.fam C +INFO/TAG=@file .. selects lines with INFO/TAG value present in the file .fam .fi .if n .RE @@ -6097,92 +6261,25 @@ bcftools view \-i \*(Aq%ID!="." & MAF[0]<0.01\*(Aq .if n .RE .sp Please refer to the documentation of your shell for details. -.SH "SCRIPTS" -.SS "gff2gff" +.SH "TERMINOLOGY" .sp -Attempts to fix a GFF file to be correctly parsed by \fBcsq\fP. +The program and the documentation uses the following terminology, multiple terms can be used +interchangeably for the same VCF record type .sp .if n .RS 4 .nf .fam C -zcat in.gff.gz | gff2gff | gzip \-c > out.gff.gz -.fam -.fi -.if n .RE -.SS "plot\-vcfstats [\fIOPTIONS\fP] \fIfile.vchk\fP [...]" -.sp -Script for processing output of \fBbcftools stats\fP. It can merge -results from multiple outputs (useful when running the stats for each -chromosome separately), plots graphs and creates a PDF presentation. -.sp -\fB\-m, \-\-merge\fP -.RS 4 -Merge vcfstats files to STDOUT, skip plotting. -.RE -.sp -\fB\-p, \-\-prefix\fP \fIDIR\fP -.RS 4 -The output directory. This directory will be created if it does not exist. -.RE -.sp -\fB\-P, \-\-no\-PDF\fP -.RS 4 -Skip the PDF creation step. -.RE -.sp -\fB\-r, \-\-rasterize\fP -.RS 4 -Rasterize PDF images for faster rendering. This is the default and the opposite of \fB\-v, \-\-vectors\fP. -.RE -.sp -\fB\-s, \-\-sample\-names\fP -.RS 4 -Use sample names for xticks rather than numeric IDs. -.RE -.sp -\fB\-t, \-\-title\fP \fISTRING\fP -.RS 4 -Identify files by these titles in plots. The option can be given multiple -times, for each ID in the \fBbcftools stats\fP output. If not -present, the script will use abbreviated source file names for the titles. -.RE -.sp -\fB\-v, \-\-vectors\fP -.RS 4 -Generate vector graphics for PDF images, the opposite of \fB\-r, \-\-rasterize\fP. -.RE -.sp -\fB\-T, \-\-main\-title\fP \fISTRING\fP -.RS 4 -Main title for the PDF. -.RE -.sp -\fBExample:\fP -.sp -.if n .RS 4 -.nf -.fam C -# Generate the stats -bcftools stats \-s \- > file.vchk -.fam -.fi -.if n .RE -.sp -.if n .RS 4 -.nf -.fam C -# Plot the stats -plot\-vcfstats \-p outdir file.vchk -.fam -.fi -.if n .RE -.sp -.if n .RS 4 -.nf -.fam C -# The final looks can be customized by editing the generated -# \*(Aqoutdir/plot.py\*(Aq script and re\-running manually -cd outdir && python plot.py && pdflatex summary.tex +REF ALT +\-\-\-\-\-\-\-\-\- +C . .. reference allele / non\-variant site / ref\-only site +C T .. SNP or SNV (single\-nucleotide polymorphism or variant), used interchangeably +CC TT .. MNP (multi\-nucleotide polymorphism) +CAAA C .. indel, deletion (regardless of length) +C CAAA .. indel, insertion (regardless of length) +C <*> .. gVCF block, the allele <*> is a placeholder for alternate allele possibly missed because of low coverage +C .. synonymous to <*> +C * .. overlapping deletion +C .. symbolic allele, known also as \*(Aqother [than above]\*(Aq .fam .fi .if n .RE diff --git a/doc/bcftools.html b/doc/bcftools.html index 0b4baab9e..50336b1c8 100644 --- a/doc/bcftools.html +++ b/doc/bcftools.html @@ -50,7 +50,7 @@

DESCRIPTION

VERSION

-

This manual page was last updated 2023-07-25 and refers to bcftools git version 1.18.

+

This manual page was last updated 2023-12-12 and refers to bcftools git version 1.19.

@@ -720,7 +720,7 @@

bcftools annotate [OPTIONS] FILE

# that INFO/END is already present in the VCF header. bcftools annotate -a annots.tab.gz -c CHROM,POS,~ID,REF,ALT,INFO/END input.vcf - # For more examples see http://samtools.github.io/bcftools/howtos/annotate.html + # For (many) more examples see http://samtools.github.io/bcftools/howtos/annotate.html
@@ -830,6 +830,10 @@

Input/output options:

output all alternate alleles present in the alignments even if they do not appear in any of the genotypes

+
-*, --keep-unseen-allele
+
+

keep the unobserved allele <*> or <NON_REF>, useful mainly for gVCF output

+
-f, --format-fields list

comma-separated list of FORMAT fields to output for each sample. Currently @@ -2196,7 +2200,12 @@

bcftools gtcheck [OPTIONS] [-g ge

Stop after first record to estimate required time.

-
-e, --error-probability INT
+
-e, --exclude [qry|gt]:'EXPRESSION'
+
+

Exclude sites from query file (qry:) or genotype file (gt:) for which EXPRESSION is true. +For valid expressions see EXPRESSIONS.

+
+
-E, --error-probability INT

Interpret genotypes and genotype likelihoods probabilistically. The value of INT represents genotype quality when GT tag is used (e.g. Q=30 represents one error in 1,000 genotypes and @@ -2204,11 +2213,16 @@

bcftools gtcheck [OPTIONS] [-g ge non-zero integer can be provided).  
 
-If -e is set to 0, the discordance score can be interpreted as the number of mismatching genotypes, +If -E is set to 0, the discordance score can be interpreted as the number of mismatching genotypes, but only in the GT-vs-GT matching mode. See the -u, --use option below for additional notes and caveats.  
 
-If performance is an issue, set -e 0 for faster run times but less accurate results.

+If performance is an issue, set -E 0 for faster run times but less accurate results. + 

+Note that in previous versions of bcftools (⇐1.18), this option used to be a smaller case -e. It +changed to make room for the filtering option -e, --exclude to stay consistent across other +commands.

-g, --genotypes FILE
@@ -2218,6 +2232,11 @@

bcftools gtcheck [OPTIONS] [-g ge

Homozygous genotypes only, useful with low coverage data (requires -g, --genotypes)

+
-i, --include [qry|gt]:'EXPRESSION'
+
+

Include sites from query file (qry:) or genotype file (gt:) for which EXPRESSION is true. +For valid expressions see EXPRESSIONS.

+
--n-matches INT

Print only top INT matches for each sample, 0 for unlimited. Use negative value @@ -2229,6 +2248,14 @@

bcftools gtcheck [OPTIONS] [-g ge

Disable calculation of HWE probability to reduce memory requirements with comparisons between very large number of sample pairs.

+
-o, --output FILE
+
+

Write to FILE rather than to standard output, where it is written by default.

+
+
-O, --output-type t|z
+
+

Write a plain (t) or compressed (z) text tab-delimited output.

+
-p, --pairs LIST

A comma-separated list of sample pairs to compare. When the -g option is given, the first @@ -2288,7 +2315,7 @@

bcftools gtcheck [OPTIONS] [-g ge  
Note that when the requested tag is not available, the program will attempt to use the other tag. The output includes the number of sites that were matched by the four -possible mode (for example GT-vs-GT or GT-vs-PL).

+possible modes (for example GT-vs-GT or GT-vs-PL).

@@ -2297,10 +2324,10 @@

bcftools gtcheck [OPTIONS] [-g ge
-
   # Check discordance of all samples from B against all sample in A
+
   # Check discordance of all samples from B against all samples in A
    bcftools gtcheck -g A.bcf B.bcf
 
-   # Limit comparisons to the fiven list of samples
+   # Limit comparisons to the given list of samples
    bcftools gtcheck -s gt:a1,a2,a3 -s qry:b1,b2 -g A.bcf B.bcf
 
    # Compare only two pairs a1,b1 and a1,b2
@@ -2335,6 +2362,13 @@ 

Options:

Also display the first INT variant records. By default, no variant records are displayed.

+
-s, --samples INT
+
+

Display the first INT variant records including the last #CHROM header line with samples. +Running with -s 0 alone outputs the #CHROM header line only. Note that +the list of samples, with each sample per line, can be obtained with bcftools query using +the option -l, --list-samples.

+
@@ -2487,7 +2521,7 @@

bcftools isec [OPTIONS] A.vcf.gz B.vcf.gz
-w, --write LIST
-

list of input files to output given as 1-based indices. With -p and no +

comma-separated list of input files to output given as 1-based indices. With -p and no -w, all files are written.

--write-index
@@ -2625,9 +2659,11 @@

bcftools merge [OPTIONS] A.vcf.gz B.vcf.gz< maximum number of alternate alleles that can be included in the PL tag. The default value is 0 which disables the feature and outputs values for all alternate alleles.

-
-m, --merge snps|indels|both|snp-ins-del|all|none|id
+
-m, --merge snps|indels|both|snp-ins-del|all|none|id[,*]
-

The option controls what types of multiallelic records can be created:

+

The option controls what types of multiallelic records can be created. If single asterisk + is appended, the unobserved allele <> or <NON_REF> will be removed at variant sites; +if two asterisks ** are appended, the unobserved allele will be removed all sites.

@@ -2637,6 +2673,8 @@

bcftools merge [OPTIONS] A.vcf.gz B.vcf.gz< -m snps .. allow multiallelic SNP records -m indels .. allow multiallelic indel records -m both .. both SNP and indel records can be multiallelic +-m both,* .. same as above but remove <*> (or <NON_REF>) from variant sites +-m both,** .. same as above but remove <*> (or <NON_REF>) at all sites -m all .. SNP records can be merged with indel records -m snp-ins-del .. allow multiallelic SNVs, insertions, deletions, but don't mix them -m id .. merge by ID @@ -3849,6 +3887,12 @@

bcftools query [OPTIONS] file.vcf.gz [file.

learn by example, see below

+
-F, --print-filtered STR
+
+

by default, samples failing -i/-e filtering expressions are suppressed from output +when FORMAT fields are queried (for example %CHROM %POS [ %GT]). With -F, such +fields will be still printed but instead of their actual value, STR will be used.

+
-H, --print-header

print header

@@ -3862,6 +3906,14 @@

bcftools query [OPTIONS] file.vcf.gz [file.

list sample names and exit

+
-N, --disable-automatic-newline
+
+

disable automatic addition of a missing newline character at the end of the formatting +expression. By default, the program checks if the expression contains a newline +and appends it if not, to prevent formatting the entire output into a single +line by mistake. Note that versions prior to 1.18 had no automatic check and newline +had to be included explicitly.

+
-o, --output FILE

see Common Options

@@ -4488,6 +4540,11 @@

Output options

Subset options:

+
-A, --trim-unseen-alleles
+
+

remove the unseen allele <*> or <NON_REF> at variant sites when the option is given once (-A) or +at all sites when the options is given twice (-AA).

+
-a, --trim-alt-alleles

remove alleles not seen in the genotype fields from the ALT column. Note that if no alternate allele @@ -4680,6 +4737,98 @@

bcftools [--version-only]

+

SCRIPTS

+
+
+

gff2gff

+
+

Attempts to fix a GFF file to be correctly parsed by csq.

+
+
+
+
+
+
zcat in.gff.gz | gff2gff | gzip -c > out.gff.gz
+
+
+
+
+
+
+

plot-vcfstats [OPTIONS] file.vchk […​]

+
+

Script for processing output of bcftools stats. It can merge +results from multiple outputs (useful when running the stats for each +chromosome separately), plots graphs and creates a PDF presentation.

+
+
+
+
-m, --merge
+
+

Merge vcfstats files to STDOUT, skip plotting.

+
+
-p, --prefix DIR
+
+

The output directory. This directory will be created if it does not exist.

+
+
-P, --no-PDF
+
+

Skip the PDF creation step.

+
+
-r, --rasterize
+
+

Rasterize PDF images for faster rendering. This is the default and the opposite of -v, --vectors.

+
+
-s, --sample-names
+
+

Use sample names for xticks rather than numeric IDs.

+
+
-t, --title STRING
+
+

Identify files by these titles in plots. The option can be given multiple +times, for each ID in the bcftools stats output. If not +present, the script will use abbreviated source file names for the titles.

+
+
-v, --vectors
+
+

Generate vector graphics for PDF images, the opposite of -r, --rasterize.

+
+
-T, --main-title STRING
+
+

Main title for the PDF.

+
+
+
+
+

Example:

+
+
+
+
+
+
# Generate the stats
+bcftools stats -s - > file.vchk
+
+
+
+
+
# Plot the stats
+plot-vcfstats -p outdir file.vchk
+
+
+
+
+
# The final looks can be customized by editing the generated
+# 'outdir/plot.py' script and re-running manually
+cd outdir && python plot.py && pdflatex summary.tex
+
+
+
+
+
+
+
+

FILTERING EXPRESSIONS

@@ -4689,8 +4838,7 @@

FILTERING EXPRESSIONS

Valid expressions may contain:
  • -

    numerical constants, string constants, file names (this is currently -supported only to filter by the ID column)

    +

    numerical constants, string constants, file names (indicated by the prefix @)

    1, 1.0, 1e-4
    @@ -4824,7 +4972,7 @@ 

    FILTERING EXPRESSIONS

  • -

    TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,bnd,other,overlap). Use the regex +

    TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,bnd,other,overlap, see TERMINOLOGY). Use the regex operator "\~" to require at least one allele of the given type or the equal sign "=" to require that all alleles are of the given type. Compare

    @@ -5072,12 +5220,17 @@

    FILTERING EXPRESSIONS

    -
    ID=@file       .. selects lines with ID present in the file
    +
    ID=@file               .. selects lines with ID present in the file
    +
    +
    +
    +
    +
    ID!=@~/file            .. skip lines with ID present in the ~/file
    -
    ID!=@~/file    .. skip lines with ID present in the ~/file
    +
    INFO/TAG=@file         .. selects lines with INFO/TAG value present in the file
    @@ -5116,91 +5269,27 @@

    FILTERING EXPRESSIONS

-

SCRIPTS

+

TERMINOLOGY

-
-

gff2gff

-
-

Attempts to fix a GFF file to be correctly parsed by csq.

-
-
-
-
-
-
zcat in.gff.gz | gff2gff | gzip -c > out.gff.gz
-
-
-
-
-
-
-

plot-vcfstats [OPTIONS] file.vchk […​]

-
-

Script for processing output of bcftools stats. It can merge -results from multiple outputs (useful when running the stats for each -chromosome separately), plots graphs and creates a PDF presentation.

-
-
-
-
-m, --merge
-
-

Merge vcfstats files to STDOUT, skip plotting.

-
-
-p, --prefix DIR
-
-

The output directory. This directory will be created if it does not exist.

-
-
-P, --no-PDF
-
-

Skip the PDF creation step.

-
-
-r, --rasterize
-
-

Rasterize PDF images for faster rendering. This is the default and the opposite of -v, --vectors.

-
-
-s, --sample-names
-
-

Use sample names for xticks rather than numeric IDs.

-
-
-t, --title STRING
-
-

Identify files by these titles in plots. The option can be given multiple -times, for each ID in the bcftools stats output. If not -present, the script will use abbreviated source file names for the titles.

-
-
-v, --vectors
-
-

Generate vector graphics for PDF images, the opposite of -r, --rasterize.

-
-
-T, --main-title STRING
-
-

Main title for the PDF.

-
-
-
-

Example:

+

The program and the documentation uses the following terminology, multiple terms can be used +interchangeably for the same VCF record type

-
# Generate the stats
-bcftools stats -s - > file.vchk
-
-
-
-
-
# Plot the stats
-plot-vcfstats -p outdir file.vchk
-
-
-
-
-
# The final looks can be customized by editing the generated
-# 'outdir/plot.py' script and re-running manually
-cd outdir && python plot.py && pdflatex summary.tex
-
+
REF   ALT
+---------
+C     .         .. reference allele / non-variant site / ref-only site
+C     T         .. SNP or SNV (single-nucleotide polymorphism or variant), used interchangeably
+CC    TT        .. MNP (multi-nucleotide polymorphism)
+CAAA  C         .. indel, deletion (regardless of length)
+C     CAAA      .. indel, insertion (regardless of length)
+C     <*>       .. gVCF block, the allele <*> is a placeholder for alternate allele possibly missed because of low coverage
+C     <NON_REF> .. synonymous to <*>
+C     *         .. overlapping deletion
+C     <INS>     .. symbolic allele, known also as 'other [than above]'
@@ -5277,7 +5366,7 @@

COPYING

diff --git a/doc/bcftools.txt b/doc/bcftools.txt index b2dcaf6c2..4ac809270 100644 --- a/doc/bcftools.txt +++ b/doc/bcftools.txt @@ -542,7 +542,7 @@ Add or remove annotations. # that INFO/END is already present in the VCF header. bcftools annotate -a annots.tab.gz -c CHROM,POS,~ID,REF,ALT,INFO/END input.vcf - # For more examples see http://samtools.github.io/bcftools/howtos/annotate.html + # For (many) more examples see http://samtools.github.io/bcftools/howtos/annotate.html ---- @@ -624,6 +624,9 @@ demand. The original calling model can be invoked with the *-c* option. output all alternate alleles present in the alignments even if they do not appear in any of the genotypes +*-***, --keep-unseen-allele*:: + keep the unobserved allele <*> or , useful mainly for gVCF output + *-f, --format-fields* 'list':: comma-separated list of FORMAT fields to output for each sample. Currently GQ and GP fields are supported. For convenience, the fields can be given @@ -1637,18 +1640,27 @@ The discordance score can be interpreted as the number of mismatching genotypes *--dry-run*:: Stop after first record to estimate required time. -*-e, --error-probability* 'INT':: +*-e, --exclude* ['qry'|'gt']:'EXPRESSION':: + Exclude sites from query file ('qry:') or genotype file ('gt:') for which 'EXPRESSION' is true. + For valid expressions see *<>*. + +*-E, --error-probability* 'INT':: Interpret genotypes and genotype likelihoods probabilistically. The value of 'INT' represents genotype quality when GT tag is used (e.g. Q=30 represents one error in 1,000 genotypes and Q=40 one error in 10,000 genotypes) and is ignored when PL tag is used (in that case an arbitrary non-zero integer can be provided). {nbsp} + {nbsp} + - If *-e* is set to 0, the discordance score can be interpreted as the number of mismatching genotypes, + If *-E* is set to 0, the discordance score can be interpreted as the number of mismatching genotypes, but only in the GT-vs-GT matching mode. See the *-u, --use* option below for additional notes and caveats. {nbsp} + {nbsp} + - If performance is an issue, set *-e 0* for faster run times but less accurate results. + If performance is an issue, set *-E 0* for faster run times but less accurate results. + {nbsp} + + {nbsp} + + Note that in previous versions of bcftools (<=1.18), this option used to be a smaller case *-e*. It + changed to make room for the filtering option *-e, --exclude* to stay consistent across other + commands. *-g, --genotypes* 'FILE':: VCF/BCF file with reference genotypes to compare against @@ -1656,6 +1668,10 @@ The discordance score can be interpreted as the number of mismatching genotypes *-H, --homs-only*:: Homozygous genotypes only, useful with low coverage data (requires *-g, --genotypes*) +*-i, --include* ['qry'|'gt']:'EXPRESSION':: + Include sites from query file ('qry:') or genotype file ('gt:') for which 'EXPRESSION' is true. + For valid expressions see *<>*. + *--n-matches* 'INT':: Print only top INT matches for each sample, 0 for unlimited. Use negative value to sort by HWE probability rather than the number of discordant sites. Note @@ -1665,6 +1681,12 @@ The discordance score can be interpreted as the number of mismatching genotypes Disable calculation of HWE probability to reduce memory requirements with comparisons between very large number of sample pairs. +*-o, --output* 'FILE':: + Write to 'FILE' rather than to standard output, where it is written by default. + +*-O, --output-type* 't'|'z':: + Write a plain ('t') or compressed ('z') text tab-delimited output. + *-p, --pairs* 'LIST':: A comma-separated list of sample pairs to compare. When the *-g* option is given, the first sample must be from the query file, the second from the *-g* file, third from the query file @@ -1709,14 +1731,14 @@ The discordance score can be interpreted as the number of mismatching genotypes {nbsp} + Note that when the requested tag is not available, the program will attempt to use the other tag. The output includes the number of sites that were matched by the four - possible mode (for example GT-vs-GT or GT-vs-PL). + possible modes (for example GT-vs-GT or GT-vs-PL). *Examples:* ---- - # Check discordance of all samples from B against all sample in A + # Check discordance of all samples from B against all samples in A bcftools gtcheck -g A.bcf B.bcf - # Limit comparisons to the fiven list of samples + # Limit comparisons to the given list of samples bcftools gtcheck -s gt:a1,a2,a3 -s qry:b1,b2 -g A.bcf B.bcf # Compare only two pairs a1,b1 and a1,b2 @@ -1773,6 +1795,12 @@ present, but it never adds version or command line information itself. Also display the first 'INT' variant records. By default, no variant records are displayed. +*-s, --samples* 'INT':: + Display the first 'INT' variant records including the last #CHROM header line with samples. + Running with *-s 0* alone outputs the #CHROM header line only. Note that + the list of samples, with each sample per line, can be obtained with `bcftools query` using + the option *-l, --list-samples*. + [[index]] === bcftools index ['OPTIONS'] 'in.bcf'|'in.vcf.gz' @@ -1880,7 +1908,7 @@ in the other files. see *<>* *-w, --write* 'LIST':: - list of input files to output given as 1-based indices. With *-p* and no + comma-separated list of input files to output given as 1-based indices. With *-p* and no *-w*, all files are written. *--write-index*:: @@ -1985,13 +2013,17 @@ For "vertical" merge take a look at *<>* or *<' or '' will be removed at variant sites; + if two asterisks '**' are appended, the unobserved allele will be removed all sites. ---- -m none .. no new multiallelics, output multiple records instead -m snps .. allow multiallelic SNP records -m indels .. allow multiallelic indel records -m both .. both SNP and indel records can be multiallelic +-m both,* .. same as above but remove <*> (or ) from variant sites +-m both,** .. same as above but remove <*> (or ) at all sites -m all .. SNP records can be merged with indel records -m snp-ins-del .. allow multiallelic SNVs, insertions, deletions, but don't mix them -m id .. merge by ID @@ -2894,6 +2926,11 @@ Extracts fields from VCF or BCF files and outputs them in user-defined format. *-f, --format* 'FORMAT':: learn by example, see below +*-F, --print-filtered* 'STR':: + by default, samples failing *-i/-e* filtering expressions are suppressed from output + when FORMAT fields are queried (for example '%CHROM %POS [ %GT]'). With *-F*, such + fields will be still printed but instead of their actual value, 'STR' will be used. + *-H, --print-header*:: print header @@ -2904,6 +2941,13 @@ Extracts fields from VCF or BCF files and outputs them in user-defined format. *-l, --list-samples*:: list sample names and exit +*-N, --disable-automatic-newline*:: + disable automatic addition of a missing newline character at the end of the formatting + expression. By default, the program checks if the expression contains a newline + and appends it if not, to prevent formatting the entire output into a single + line by mistake. Note that versions prior to 1.18 had no automatic check and newline + had to be included explicitly. + *-o, --output* 'FILE':: see *<>* @@ -3357,6 +3401,10 @@ Convert between VCF and BCF. Former *bcftools subset*. ==== Subset options: +*-A, --trim-unseen-alleles*:: + remove the unseen allele '<*>' or '' at variant sites when the option is given once (-A) or + at all sites when the options is given twice ('-AA'). + *-a, --trim-alt-alleles*:: remove alleles not seen in the genotype fields from the ALT column. Note that if no alternate allele remains after trimming, the record itself is not removed but ALT is set to ".". @@ -3505,6 +3553,64 @@ important libraries used by bcftools. Display the full bcftools version number in a machine-readable format. +SCRIPTS +------- + +[[gff2gff]] +=== gff2gff +Attempts to fix a GFF file to be correctly parsed by *<>*. + +-- + zcat in.gff.gz | gff2gff | gzip -c > out.gff.gz +-- + + +[[plot-vcfstats]] +=== plot-vcfstats ['OPTIONS'] 'file.vchk' [...] +Script for processing output of *<>*. It can merge +results from multiple outputs (useful when running the stats for each +chromosome separately), plots graphs and creates a PDF presentation. + +*-m, --merge*:: + Merge vcfstats files to STDOUT, skip plotting. + +*-p, --prefix* 'DIR':: + The output directory. This directory will be created if it does not exist. + +*-P, --no-PDF*:: + Skip the PDF creation step. + +*-r, --rasterize*:: + Rasterize PDF images for faster rendering. This is the default and the opposite of *-v, --vectors*. + +*-s, --sample-names*:: + Use sample names for xticks rather than numeric IDs. + +*-t, --title* 'STRING':: + Identify files by these titles in plots. The option can be given multiple + times, for each ID in the *<>* output. If not + present, the script will use abbreviated source file names for the titles. + +*-v, --vectors*:: + Generate vector graphics for PDF images, the opposite of *-r, --rasterize*. + +*-T, --main-title* 'STRING':: + Main title for the PDF. + +*Example:* +-- + # Generate the stats + bcftools stats -s - > file.vchk + + # Plot the stats + plot-vcfstats -p outdir file.vchk + + # The final looks can be customized by editing the generated + # 'outdir/plot.py' script and re-running manually + cd outdir && python plot.py && pdflatex summary.tex +-- + + [[expressions]] FILTERING EXPRESSIONS --------------------- @@ -3513,8 +3619,7 @@ These filtering expressions are accepted by most of the commands. .Valid expressions may contain: -* numerical constants, string constants, file names (this is currently - supported only to filter by the ID column) +* numerical constants, string constants, file names (indicated by the prefix '@') 1, 1.0, 1e-4 "String" @@ -3597,7 +3702,7 @@ ref-ref hom, alt-alt hom, ref-alt het, alt-alt het, haploid ref, haploid alt GT="R" GT="A" -* TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,bnd,other,overlap). Use the regex +* TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,bnd,other,overlap, see *<>*). Use the regex operator "\~" to require at least one allele of the given type or the equal sign "=" to require that all alleles are of the given type. Compare @@ -3741,9 +3846,11 @@ used on the result. For example, when querying "TAG=1,2,3,4", it will be evaluat AVG(GQ)>50 .. average (arithmetic mean) of genotype qualities bigger than 50 - ID=@file .. selects lines with ID present in the file + ID=@file .. selects lines with ID present in the file - ID!=@~/file .. skip lines with ID present in the ~/file + ID!=@~/file .. skip lines with ID present in the ~/file + + INFO/TAG=@file .. selects lines with INFO/TAG value present in the file MAF[0]<0.05 .. select rare variants at 5% cutoff @@ -3765,63 +3872,26 @@ that the whole expression is passed to the program as intended: Please refer to the documentation of your shell for details. -SCRIPTS -------- - -[[gff2gff]] -=== gff2gff -Attempts to fix a GFF file to be correctly parsed by *<>*. - +[[terminology]] +TERMINOLOGY +----------- +The program and the documentation uses the following terminology, multiple terms can be used +interchangeably for the same VCF record type -- - zcat in.gff.gz | gff2gff | gzip -c > out.gff.gz + REF ALT + --------- + C . .. reference allele / non-variant site / ref-only site + C T .. SNP or SNV (single-nucleotide polymorphism or variant), used interchangeably + CC TT .. MNP (multi-nucleotide polymorphism) + CAAA C .. indel, deletion (regardless of length) + C CAAA .. indel, insertion (regardless of length) + C <*> .. gVCF block, the allele <*> is a placeholder for alternate allele possibly missed because of low coverage + C .. synonymous to <*> + C * .. overlapping deletion + C .. symbolic allele, known also as 'other [than above]' -- -[[plot-vcfstats]] -=== plot-vcfstats ['OPTIONS'] 'file.vchk' [...] -Script for processing output of *<>*. It can merge -results from multiple outputs (useful when running the stats for each -chromosome separately), plots graphs and creates a PDF presentation. - -*-m, --merge*:: - Merge vcfstats files to STDOUT, skip plotting. - -*-p, --prefix* 'DIR':: - The output directory. This directory will be created if it does not exist. - -*-P, --no-PDF*:: - Skip the PDF creation step. - -*-r, --rasterize*:: - Rasterize PDF images for faster rendering. This is the default and the opposite of *-v, --vectors*. - -*-s, --sample-names*:: - Use sample names for xticks rather than numeric IDs. - -*-t, --title* 'STRING':: - Identify files by these titles in plots. The option can be given multiple - times, for each ID in the *<>* output. If not - present, the script will use abbreviated source file names for the titles. - -*-v, --vectors*:: - Generate vector graphics for PDF images, the opposite of *-r, --rasterize*. - -*-T, --main-title* 'STRING':: - Main title for the PDF. - -*Example:* --- - # Generate the stats - bcftools stats -s - > file.vchk - - # Plot the stats - plot-vcfstats -p outdir file.vchk - - # The final looks can be customized by editing the generated - # 'outdir/plot.py' script and re-running manually - cd outdir && python plot.py && pdflatex summary.tex --- - PERFORMANCE ----------- HTSlib was designed with BCF format in mind. When parsing VCF files, all records diff --git a/filter.c b/filter.c index b6547f81f..898d79ef4 100644 --- a/filter.c +++ b/filter.c @@ -163,6 +163,8 @@ static int op_prec[] = {0,1,1,5,5,5,5,5,5,2,3, 6, 6, 7, 7, 8, 8, 8, 3, 2, 5, 5, #define TOKEN_STRING "x()[<=>]!|&+-*/MmaAO~^S.lfcpis" // this is only for debugging, not maintained diligently static void cmp_vector_strings(token_t *atok, token_t *btok, token_t *rtok); +inline static void tok_init_samples(token_t *atok, token_t *btok, token_t *rtok); + // Return negative values if it is a function with variable number of arguments static int filters_next_token(char **str, int *len) @@ -596,6 +598,98 @@ static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1 error("Only ==, !=, ~, and !~ operators are supported for FILTER\n"); return; } +static void filters_cmp_string_hash(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line) +{ + if ( btok->hash ) + { + token_t *tmp = atok; atok = btok; btok = tmp; + } + if ( rtok->tok_type!=TOK_EQ && rtok->tok_type!=TOK_NE ) + error("Only == and != operators are supported for strings read from a file\n"); + + // INFO + if ( !btok->nsamples ) + { + // there is only one string value, e.g. STR[1]=@list.txt + if ( btok->idx >= 0 ) + { + int ret = khash_str2int_has_key(atok->hash, btok->str_value.s); + if ( rtok->tok_type==TOK_NE ) ret = ret ? 0 : 1; + rtok->pass_site = ret; + return; + } + + // there can be multiple comma-separated string values, e.g. STR=@list.txt or STR[*]=@list.txt + int ret = 0; + char *ptr = btok->str_value.s; + while ( *ptr ) + { + char *eptr = ptr + 1; + while ( *eptr && *eptr!=',' ) eptr++; + char keep = *eptr; + *eptr = 0; + ret |= khash_str2int_has_key(atok->hash, ptr); + *eptr = keep; + if ( !keep ) break; + ptr = eptr + 1; + } + if ( rtok->tok_type==TOK_NE ) ret = ret ? 0 : 1; + rtok->pass_site = ret; + return; + } + + + // FORMAT + tok_init_samples(atok, btok, rtok); + rtok->pass_site = 0; + int i; + + // there is only one string value, e.g. FMT/STR[*:1]=@list.txt + if ( btok->idx >= 0 ) + { + for (i=0; insamples; i++) + { + if ( !rtok->usmpl[i] ) continue; + char *str = btok->str_value.s + i*btok->nval1; + char keep = str[btok->nval1]; + str[btok->nval1] = 0; + int ret = khash_str2int_has_key(atok->hash, str); + str[btok->nval1] = keep; + if ( rtok->tok_type==TOK_NE ) ret = ret ? 0 : 1; + rtok->pass_samples[i] = ret; + rtok->pass_site |= ret; + } + return; + } + + // there can be multiple comma-separated string values, e.g. FMT/STR=@list.txt + for (i=0; insamples; i++) + { + if ( !rtok->usmpl[i] ) continue; + char *str = btok->str_value.s + i*btok->nval1; + char keep = str[btok->nval1]; + str[btok->nval1] = 0; + + // now str contains the block of per-sample comma-separated strings to loop over + int ret = 0; + char *ptr = str; + while ( *ptr ) + { + char *eptr = ptr + 1; + while ( *eptr && *eptr!=',' ) eptr++; + char keep0 = *eptr; + *eptr = 0; + ret |= khash_str2int_has_key(atok->hash, ptr); + *eptr = keep0; + if ( !keep0 ) break; + ptr = eptr + 1; + } + str[btok->nval1] = keep; + if ( rtok->tok_type==TOK_NE ) ret = ret ? 0 : 1; + rtok->pass_samples[i] = ret; + rtok->pass_site |= ret; + } +} static void filters_cmp_id(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line) { if ( btok->hash ) @@ -1008,7 +1102,8 @@ static void filters_set_format_string(filter_t *flt, bcf1_t *line, token_t *tok) int i, ndim = tok->str_value.m; int nstr = bcf_get_format_char(flt->hdr, line, tok->tag, &tok->str_value.s, &ndim); - tok->str_value.m = ndim; + tok->str_value.m = tok->str_value.l = ndim; + kputc(0,&tok->str_value); // append the nul byte tok->str_value.l = tok->nvalues = 0; if ( nstr<0 ) return; @@ -3486,7 +3581,10 @@ static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error { int j = out[i+1].tok_type==TOK_VAL ? i+1 : i-1; if ( out[j].comparator!=filters_cmp_id ) - error("Error: could not parse the expression. Note that the \"@file_name\" syntax can be currently used with ID column only.\n"); + { + if ( out[j].comparator ) error("Error: could not parse the expression with \"@file_name\" syntax (possible todo)\n"); + out[j].comparator = filters_cmp_string_hash; + } } if ( out[i].tok_type==TOK_OR || out[i].tok_type==TOK_OR_VEC ) out[i].func = vector_logic_or; diff --git a/gff.c b/gff.c index 90da84ba9..5d9f062be 100644 --- a/gff.c +++ b/gff.c @@ -23,6 +23,20 @@ THE SOFTWARE. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bcftools.h" +#include "regidx.h" #include "gff.h" /* @@ -727,12 +741,12 @@ static void tscript_init_cds(gff_t *gff) if ( phase!=len%3 ) { if ( !gff->force ) - error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n", + error("Error: GFF3 assumption failed for transcript %s, CDS=%"PRIu32": phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n", gff->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len); if ( gff->verbosity > 0 ) { if ( !gff->warned.wrong_phase || gff->verbosity > 1 ) - fprintf(stderr,"Warning: The GFF has inconsistent phase column in transcript %s, skipping. CDS pos=%d: phase!=len%%3 (phase=%d, len=%d)\n", + fprintf(stderr,"Warning: The GFF has inconsistent phase column in transcript %s, skipping. CDS pos=%"PRIu32": phase!=len%%3 (phase=%d, len=%d)\n", gff->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len); gff->warned.wrong_phase++; } @@ -790,12 +804,12 @@ static void tscript_init_cds(gff_t *gff) if ( phase!=len%3 ) { if ( !gff->force ) - error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n", + error("Error: GFF3 assumption failed for transcript %s, CDS=%"PRIu32": phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n", gff->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len); if ( gff->verbosity > 0 ) { if ( !gff->warned.wrong_phase || gff->verbosity > 1 ) - fprintf(stderr,"Warning: The GFF has inconsistent phase column in transcript %s, skipping. CDS pos=%d: phase!=len%%3 (phase=%d, len=%d)\n", + fprintf(stderr,"Warning: The GFF has inconsistent phase column in transcript %s, skipping. CDS pos=%"PRIu32": phase!=len%%3 (phase=%d, len=%d)\n", gff->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len); gff->warned.wrong_phase++; } @@ -896,7 +910,7 @@ static int gff_dump(gff_t *gff, const char *fname) gf_gene_t *gene = (gf_gene_t*) kh_val(gff->init.gid2gene, k); char *gene_id = gff->init.gene_ids.str[gene->id]; str.l = 0; - ksprintf(&str,"%s\t.\tgene\t%d\t%d\t.\t%c\t.\tID=%s;Name=%s;used=%d\n",gff->init.seq[gene->iseq],gene->beg+1,gene->end+1,gene->strand==STRAND_FWD?'+':'-',gene_id,gene->name,gene->used); + ksprintf(&str,"%s\t.\tgene\t%"PRIu32"\t%"PRIu32"\t.\t%c\t.\tID=%s;Name=%s;used=%d\n",gff->init.seq[gene->iseq],gene->beg+1,gene->end+1,gene->strand==STRAND_FWD?'+':'-',gene_id,gene->name,gene->used); if ( bgzf_write(out, str.s, str.l) != str.l ) error("Error writing %s: %s\n", fname, strerror(errno)); } @@ -907,7 +921,7 @@ static int gff_dump(gff_t *gff, const char *fname) char *gene_id = gff->init.gene_ids.str[tr->gene->id]; const char *type = tr->type==GF_PROTEIN_CODING ? "mRNA" : gf_type2gff_string(tr->type); str.l = 0; - ksprintf(&str,"%s\t.\t%s\t%d\t%d\t.\t%c\t.\tID=%s;Parent=%s;biotype=%s;used=%d\n",itr->seq,type,itr->beg+1,itr->end+1,tr->strand==STRAND_FWD?'+':'-',gff->tscript_ids.str[tr->id],gene_id,gf_type2gff_string(tr->type),tr->used); + ksprintf(&str,"%s\t.\t%s\t%"PRIu32"\t%"PRIu32"\t.\t%c\t.\tID=%s;Parent=%s;biotype=%s;used=%d\n",itr->seq,type,itr->beg+1,itr->end+1,tr->strand==STRAND_FWD?'+':'-',gff->tscript_ids.str[tr->id],gene_id,gf_type2gff_string(tr->type),tr->used); if ( bgzf_write(out, str.s, str.l) != str.l ) error("Error writing %s: %s\n", fname, strerror(errno)); } regitr_destroy(itr); @@ -918,7 +932,7 @@ static int gff_dump(gff_t *gff, const char *fname) gf_cds_t *cds = regitr_payload(itr,gf_cds_t*); gf_tscript_t *tr = cds->tr; str.l = 0; - ksprintf(&str,"%s\t.\tCDS\t%d\t%d\t.\t%c\t%c\tParent=%s\n",itr->seq,cds->beg+1,cds->beg+cds->len,tr->strand==STRAND_FWD?'+':'-',cds->phase==3?'.':cds->phase+(int)'0',gff->tscript_ids.str[tr->id]); + ksprintf(&str,"%s\t.\tCDS\t%"PRIu32"\t%"PRIu32"\t.\t%c\t%c\tParent=%s\n",itr->seq,cds->beg+1,cds->beg+cds->len,tr->strand==STRAND_FWD?'+':'-',cds->phase==3?'.':cds->phase+(int)'0',gff->tscript_ids.str[tr->id]); if ( bgzf_write(out, str.s, str.l) != str.l ) error("Error writing %s: %s\n", fname, strerror(errno)); } regitr_destroy(itr); @@ -929,7 +943,7 @@ static int gff_dump(gff_t *gff, const char *fname) gf_utr_t *utr = regitr_payload(itr,gf_utr_t*); gf_tscript_t *tr = utr->tr; str.l = 0; - ksprintf(&str,"%s\t.\t%s_prime_UTR\t%d\t%d\t.\t%c\t.\tParent=%s\n",itr->seq,utr->which==prime3?"three":"five",utr->beg+1,utr->end+1,tr->strand==STRAND_FWD?'+':'-',gff->tscript_ids.str[tr->id]); + ksprintf(&str,"%s\t.\t%s_prime_UTR\t%"PRIu32"\t%"PRIu32"\t.\t%c\t.\tParent=%s\n",itr->seq,utr->which==prime3?"three":"five",utr->beg+1,utr->end+1,tr->strand==STRAND_FWD?'+':'-',gff->tscript_ids.str[tr->id]); if ( bgzf_write(out, str.s, str.l) != str.l ) error("Error writing %s: %s\n", fname, strerror(errno)); } regitr_destroy(itr); @@ -940,7 +954,7 @@ static int gff_dump(gff_t *gff, const char *fname) gf_exon_t *exon = regitr_payload(itr,gf_exon_t*); gf_tscript_t *tr = exon->tr; str.l = 0; - ksprintf(&str,"%s\t.\texon\t%d\t%d\t.\t%c\t.\tParent=%s\n",itr->seq,exon->beg+1,exon->end+1,tr->strand==STRAND_FWD?'+':'-',gff->tscript_ids.str[tr->id]); + ksprintf(&str,"%s\t.\texon\t%"PRIu32"\t%"PRIu32"\t.\t%c\t.\tParent=%s\n",itr->seq,exon->beg+1,exon->end+1,tr->strand==STRAND_FWD?'+':'-',gff->tscript_ids.str[tr->id]); if ( bgzf_write(out, str.s, str.l) != str.l ) error("Error writing %s: %s\n", fname, strerror(errno)); } regitr_destroy(itr); @@ -1004,7 +1018,7 @@ int gff_parse(gff_t *gff) else if ( ftr->type==GF_UTR5 ) register_utr(gff, ftr); else if ( ftr->type==GF_UTR3 ) register_utr(gff, ftr); else - error("something: %s\t%d\t%d\t%s\t%s\n", aux->seq[ftr->iseq],ftr->beg+1,ftr->end+1,gff->tscript_ids.str[ftr->trid],gf_type2gff_string(ftr->type)); + error("something: %s\t%"PRIu32"\t%"PRIu32"\t%s\t%s\n", aux->seq[ftr->iseq],ftr->beg+1,ftr->end+1,gff->tscript_ids.str[ftr->trid],gf_type2gff_string(ftr->type)); } tscript_init_cds(gff); diff --git a/gff.h b/gff.h index ebb64634a..725d0794f 100644 --- a/gff.h +++ b/gff.h @@ -137,23 +137,7 @@ #ifndef GFF_H__ #define GFF_H__ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "bcftools.h" -#include "regidx.h" +#include #ifndef __FUNCTION__ # define __FUNCTION__ __func__ diff --git a/gvcf.c b/gvcf.c index c7b2e77d1..137194a44 100644 --- a/gvcf.c +++ b/gvcf.c @@ -40,7 +40,7 @@ struct _gvcf_t void gvcf_update_header(gvcf_t *gvcf, bcf_hdr_t *hdr) { bcf_hdr_append(hdr,"##INFO="); - bcf_hdr_append(hdr,"##INFO="); + bcf_hdr_append(hdr,"##INFO="); } gvcf_t *gvcf_init(const char *dp_ranges) @@ -98,7 +98,6 @@ bcf1_t *gvcf_write(gvcf_t *gvcf, htsFile *fh, bcf_hdr_t *hdr, bcf1_t *rec, int i // encountered, or other conditions not met (block broken by a non-ref or DP too low). int needs_flush = can_collapse ? 0 : 1; - // Can the record be included in a gVCF block? That is, is this a ref-only site? if ( rec && can_collapse ) { @@ -148,7 +147,7 @@ bcf1_t *gvcf_write(gvcf_t *gvcf, htsFile *fh, bcf_hdr_t *hdr, bcf1_t *rec, int i bcf_update_alleles_str(hdr, gvcf->line, gvcf->als.s); if ( gvcf->start+1 < gvcf->end ) // create gVCF record only if it spans at least two sites bcf_update_info_int32(hdr, gvcf->line, "END", &gvcf->end, 1); - bcf_update_info_int32(hdr, gvcf->line, "MinDP", &gvcf->min_dp, 1); + bcf_update_info_int32(hdr, gvcf->line, "MIN_DP", &gvcf->min_dp, 1); if ( gvcf->nqsum>0 ) bcf_update_info_float(hdr, gvcf->line, "QS", gvcf->qsum, gvcf->nqsum); if ( gvcf->ngts ) @@ -220,7 +219,7 @@ bcf1_t *gvcf_write(gvcf_t *gvcf, htsFile *fh, bcf_hdr_t *hdr, bcf1_t *rec, int i } if ( is_ref && min_dp ) - bcf_update_info_int32(hdr, rec, "MinDP", &min_dp, 1); + bcf_update_info_int32(hdr, rec, "MIN_DP", &min_dp, 1); return rec; } diff --git a/mcall.c b/mcall.c index 804ff0131..deb2f33e5 100644 --- a/mcall.c +++ b/mcall.c @@ -1558,6 +1558,11 @@ int mcall(call_t *call, bcf1_t *rec) call->nals_new = 0; for (i=0; iflag&CALL_KEEP_UNSEEN) && i==unseen && call->nals_new==1 ) + { + call->nals_new++; + call->als_new |= 1<0 && i==unseen ) continue; if ( call->flag & CALL_KEEPALT ) call->als_new |= 1<als_new & (1<nals_new++; @@ -1669,6 +1674,6 @@ int mcall(call_t *call, bcf1_t *rec) bcf_update_info_int32(call->hdr, rec, "I16", NULL, 0); // remove I16 tag - return call->nals_new; + return is_variant ? call->nals_new : 1; } diff --git a/plugins/split-vep.c b/plugins/split-vep.c index 82c1ff0bf..f44382d67 100644 --- a/plugins/split-vep.c +++ b/plugins/split-vep.c @@ -60,6 +60,7 @@ typedef struct { regex_t *regex; char *type; + int bcf_ht_type; } col2type_t; @@ -170,7 +171,8 @@ static const char *default_column_types(void) { return "# Default CSQ subfield types, unlisted fields are type String.\n" - "# Note the use of regular expressions.\n" + "# Note that the name search is done using regular expressions, with\n" + "# \"^\" and \"$\" appended automatically\n" "cDNA_position Integer\n" "CDS_position Integer\n" "Protein_position Integer\n" @@ -179,6 +181,7 @@ static const char *default_column_types(void) "TSL Integer\n" "GENE_PHENO Integer\n" "HGVS_OFFSET Integer\n" + ".*_POPS String\n" // e.g. MAX_AF_POPS "AF Float\n" ".*_AF Float\n" "MAX_AF_.* Float\n" @@ -344,32 +347,34 @@ static void init_column2type(args_t *args) free(str); } if ( !type || !ntype ) error("Failed to parse the column types\n"); + kstring_t tmp = {0,0,0}; for (i=0; incolumn2type++; args->column2type = (col2type_t*) realloc(args->column2type,sizeof(*args->column2type)*args->ncolumn2type); col2type_t *ct = &args->column2type[args->ncolumn2type-1]; ct->regex = (regex_t *) malloc(sizeof(regex_t)); - if ( regcomp(ct->regex, tmp, REG_NOSUB) ) - error("Error: fail to compile the column type regular expression \"%s\": %s\n", tmp,type[i]); - int type_ok = 0; - if ( !strcmp(ptr,"Float") ) type_ok = 1; - else if ( !strcmp(ptr,"Integer") ) type_ok = 1; - else if ( !strcmp(ptr,"Flag") ) type_ok = 1; - else if ( !strcmp(ptr,"String") ) type_ok = 1; - if ( !type_ok ) error("Error: the column type \"%s\" is not supported: %s\n",ptr,type[i]); + if ( regcomp(ct->regex, tmp.s, REG_NOSUB) ) + error("Error: fail to compile the column type regular expression \"%s\": %s\n", tmp.s,type[i]); + ct->bcf_ht_type = -1; + if ( !strcmp(ptr,"Float") ) ct->bcf_ht_type = BCF_HT_REAL; + else if ( !strcmp(ptr,"Integer") ) ct->bcf_ht_type = BCF_HT_INT; + else if ( !strcmp(ptr,"Flag") ) ct->bcf_ht_type = BCF_HT_FLAG; + else if ( !strcmp(ptr,"String") ) ct->bcf_ht_type = BCF_HT_STR; + if ( ct->bcf_ht_type==-1 ) error("Error: the column type \"%s\" is not supported: %s\n",ptr,type[i]); ct->type = strdup(ptr); - free(tmp); } + free(tmp.s); if ( !args->ncolumn2type ) error("Failed to parse the column types\n"); for (i=0; incolumn2type = 0; args->column2type = NULL; } -static const char *get_column_type(args_t *args, char *field) +static const char *get_column_type(args_t *args, char *field, int *type) { if ( !args->column2type ) init_column2type(args); int i; for (i=0; incolumn2type; i++) { int match = regexec(args->column2type[i].regex, field, 0,NULL,0) ? 0 : 1; - if ( match ) return args->column2type[i].type; + if ( match ) + { + *type = args->column2type[i].bcf_ht_type; + return args->column2type[i].type; + } } + *type = BCF_HT_STR; return "String"; } @@ -647,7 +657,20 @@ static void parse_column_str(args_t *args) ep++; } - // Now add each column to the VCF header and reconstruct the column_str in case it will be needed later + // Prune duplicates + for (i=0; inannot; i++) + { + for (j=0; jfield[column[i]],args->field[column[j]]) ) break; + if ( i==j ) continue; // unique tag, no action needed + args->nannot--; + if ( i==args->nannot ) break; // the last one is to be skipped, we are done + memmove(&column[i],&column[i+1],sizeof(*column)*(args->nannot-i)); + i--; + } + + // Now initizalize each annotation, add each column to the VCF header, and reconstruct + // the column_str in case it will be needed later free(args->column_str); kstring_t str = {0,0,0}; args->annot = (annot_t*)calloc(args->nannot,sizeof(*args->annot)); @@ -664,7 +687,7 @@ static void parse_column_str(args_t *args) else if ( ann->type==BCF_HT_INT ) type = "Integer"; else if ( ann->type==BCF_HT_FLAG ) type = "Flag"; else if ( ann->type==BCF_HT_STR ) type = "String"; - else if ( ann->type==-1 ) type = get_column_type(args, args->field[j]); + else if ( ann->type==-1 ) type = get_column_type(args, args->field[j], &ann->type); ksprintf(&args->kstr,"##INFO=",type); bcf_hdr_printf(args->hdr_out, args->kstr.s, ann->tag,ann->field,args->vep_tag); if ( str.l ) kputc(',',&str); diff --git a/plugins/trio-dnm2.c b/plugins/trio-dnm2.c index 7cbf7fbcd..20e4d144c 100644 --- a/plugins/trio-dnm2.c +++ b/plugins/trio-dnm2.c @@ -567,8 +567,12 @@ static void init_tprob_mprob_chrXX(args_t *args, int fi, int mi, int ci, double *denovo_allele = ca!=fa && ca!=fb && ca!=ma && ca!=mb ? ca : cb; - if ( fa!=fb ) // father cannot be heterozygous in X - *mprob = 0, *tprob = 0; + if ( fa!=fb ) + { + // this must be a genotype error, father cannot be heterozygous in X; don't flag it as a DNM unless + // also autosomal inheritance fails + init_tprob_mprob(args,fi,mi,ci,tprob,mprob,denovo_allele); + } else if ( (ca==fa && (cb==ma||cb==mb)) || (cb==fa && (ca==ma||ca==mb)) ) { if ( ma==mb ) *tprob = 1; diff --git a/read_consensus.c b/read_consensus.c index 5c8133f28..593b19b5f 100644 --- a/read_consensus.c +++ b/read_consensus.c @@ -275,7 +275,7 @@ int rcns_set_reads(read_cns_t *rcns, bam_pileup1_t *plp, int nplp) } y += len; } - else if ( op==BAM_CDEL ) + else if ( op==BAM_CDEL || op==BAM_CREF_SKIP ) /* note: unsure about BAM_CREF_SKIP, don't have data to test */ { if ( x>rcns->beg && x+len-1<=rcns->end ) { diff --git a/test/annotate19.1.out b/test/annotate19.1.out index 1b5822314..87db4898e 100644 --- a/test/annotate19.1.out +++ b/test/annotate19.1.out @@ -2,7 +2,9 @@ ##FILTER= ##contig= ##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO -1 3000001 id_id;info_id C T . . ID=id_id;INFO_ID=info_id +1 3000001 id_id1;id_id2;info_id C T . ori_filter1;ori_filter2 ID=id_id1,id_id2;INFO_ID=info_id diff --git a/test/annotate19.2.out b/test/annotate19.2.out index f96782183..b5d8d148f 100644 --- a/test/annotate19.2.out +++ b/test/annotate19.2.out @@ -2,6 +2,8 @@ ##FILTER= ##contig= ##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= ##FILTER= ##INFO= ##INFO= diff --git a/test/annotate19.3.out b/test/annotate19.3.out new file mode 100644 index 000000000..0f6984b94 --- /dev/null +++ b/test/annotate19.3.out @@ -0,0 +1,10 @@ +##fileformat=VCFv4.1 +##FILTER= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 3000001 ori_id C T . ori_filter1;ori_filter2 FILTER=filter_filter;INFO_FILTER=info_filter diff --git a/test/annotate19.4.out b/test/annotate19.4.out new file mode 100644 index 000000000..ff3556bd7 --- /dev/null +++ b/test/annotate19.4.out @@ -0,0 +1,9 @@ +##fileformat=VCFv4.1 +##FILTER= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 3000001 ori_id C T . ori_filter1;ori_filter2 FILTER=ori_filter1,ori_filter2 diff --git a/test/annotate19.5.out b/test/annotate19.5.out new file mode 100644 index 000000000..505efa4d5 --- /dev/null +++ b/test/annotate19.5.out @@ -0,0 +1,9 @@ +##fileformat=VCFv4.1 +##FILTER= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 3000001 ori_id C T . ori_filter1;ori_filter2 FILTER=filter_filter diff --git a/test/annotate19.6.out b/test/annotate19.6.out new file mode 100644 index 000000000..a090a9147 --- /dev/null +++ b/test/annotate19.6.out @@ -0,0 +1,10 @@ +##fileformat=VCFv4.1 +##FILTER= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= +##INFO= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 3000001 ori_id C T . filter_filter FILTER=ori_filter1,ori_filter2 diff --git a/test/annotate19.7.out b/test/annotate19.7.out new file mode 100644 index 000000000..6b1106500 --- /dev/null +++ b/test/annotate19.7.out @@ -0,0 +1,10 @@ +##fileformat=VCFv4.1 +##FILTER= +##contig= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= +##FILTER= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 3000001 ori_id C T . filter_filter FILTER=filter_filter diff --git a/test/annotate19.dst.vcf b/test/annotate19.dst.vcf index 9781bbe6b..8c3a9c769 100644 --- a/test/annotate19.dst.vcf +++ b/test/annotate19.dst.vcf @@ -1,5 +1,7 @@ ##fileformat=VCFv4.1 ##contig= ##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##FILTER= +##FILTER= #CHROM POS ID REF ALT QUAL FILTER INFO -1 3000001 ori_id C T . . . +1 3000001 ori_id C T . ori_filter1;ori_filter2 . diff --git a/test/annotate19.src.vcf b/test/annotate19.src.vcf index 6d5b577be..ab368cbfa 100644 --- a/test/annotate19.src.vcf +++ b/test/annotate19.src.vcf @@ -6,4 +6,4 @@ ##INFO= ##FILTER= #CHROM POS ID REF ALT QUAL FILTER INFO -1 3000001 id_id C T 11 filter_filter FILTER=info_filter;ID=info_id;QUAL=99 +1 3000001 id_id1;id_id2 C T 11 filter_filter FILTER=info_filter;ID=info_id;QUAL=99 diff --git a/test/annots-mark.1.out b/test/annots-mark.1.out new file mode 100644 index 000000000..c77f64aea --- /dev/null +++ b/test/annots-mark.1.out @@ -0,0 +1,16 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +##FORMAT= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT test +chr1 10611 . C G . . . GT 1/1 +chr1 10623 . T C . . TAG GT 1/1 +chr1 10647 . A G . . TAG GT 0/1 +chr1 10671 . G C . . TAG GT 0/1 +chr1 10676 . A G . . . GT 0/1 +chr1 10705 . A G . . . GT 0/1 +chr1 10729 . G C . . . GT 0/1 +chr1 10734 . A G . . . GT 0/1 +chr1 10751 . C G . . . GT 0/1 +chr1 10758 . G C . . . GT 0/1 diff --git a/test/annots-mark.bed b/test/annots-mark.bed new file mode 100644 index 000000000..74932b859 --- /dev/null +++ b/test/annots-mark.bed @@ -0,0 +1 @@ +chr1 10611 10671 \ No newline at end of file diff --git a/test/annots-mark.vcf b/test/annots-mark.vcf new file mode 100644 index 000000000..8db832714 --- /dev/null +++ b/test/annots-mark.vcf @@ -0,0 +1,14 @@ +##fileformat=VCFv4.3 +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT test +chr1 10611 . C G . . . GT 1/1 +chr1 10623 . T C . . . GT 1/1 +chr1 10647 . A G . . . GT 0/1 +chr1 10671 . G C . . . GT 0/1 +chr1 10676 . A G . . . GT 0/1 +chr1 10705 . A G . . . GT 0/1 +chr1 10729 . G C . . . GT 0/1 +chr1 10734 . A G . . . GT 0/1 +chr1 10751 . C G . . . GT 0/1 +chr1 10758 . G C . . . GT 0/1 diff --git a/test/atomize.split.1.0.out b/test/atomize.split.1.0.out new file mode 100644 index 000000000..5e36d2276 --- /dev/null +++ b/test/atomize.split.1.0.out @@ -0,0 +1,60 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +##contig= +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3 S4 +11 101 . G C . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|4 GT 0 0 0 1 +11 101 . G * . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|4 GT 1 0 0 0 +11 101 . GCGT G . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|1 GT 1 0 0 0 +11 101 . GCGT * . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|1 GT 0 1 1 1 +11 102 . C T . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|3 GT 0 0 1 0 +11 102 . C * . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|3 GT 1 0 0 0 +11 104 . T A . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|2,3 GT 0 1 1 0 +11 104 . T * . . OLD_REC=11|101|GCGT|G,GCGA,GTGA,CCGT|2,3 GT 1 0 0 0 +11 201 . C G . . OLD_REC=11|201|CC|GG,GT|1,2 GT 0 1 1 0 +11 201 . C * . . OLD_REC=11|201|CC|GG,GT|1,2 GT 0 0 0 0 +11 202 . C G . . OLD_REC=11|201|CC|GG,GT|1 GT 0 1 0 0 +11 202 . C * . . OLD_REC=11|201|CC|GG,GT|1 GT 0 0 1 0 +11 202 . C T . . OLD_REC=11|201|CC|GG,GT|2 GT 0 0 1 0 +11 202 . C * . . OLD_REC=11|201|CC|GG,GT|2 GT 0 1 0 0 +12 101 rs101 G C 199 flt INDEL;AN=4;AC=4;DP=19;ISTR=SomeString;XRF=0,4;XRI=0,4;XRS=000,xx;XAF=4;XAI=444;XAS=zzz;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|4 GT 0/0 0/0 0/0 0/1 +12 101 rs101 G * 199 flt INDEL;AN=4;AC=.;DP=19;ISTR=SomeString;XRF=0,.;XRI=0,.;XRS=000,.;XAF=.;XAI=.;XAS=.;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|4 GT 0/1 1/0 0/0 0/0 +12 101 rs101 GCGT G 199 flt INDEL;AN=4;AC=1;DP=19;ISTR=SomeString;XRF=0,10;XRI=0,1111;XRS=000,AAA;XAF=10;XAI=1111;XAS=AAA;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|1 GT 0/1 1/0 0/0 0/0 +12 101 rs101 GCGT * 199 flt INDEL;AN=4;AC=.;DP=19;ISTR=SomeString;XRF=0,.;XRI=0,.;XRS=000,.;XAF=.;XAI=.;XAS=.;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|1 GT 0/0 0/1 1/1 1/1 +12 102 rs101 C T 199 flt INDEL;AN=4;AC=3;DP=19;ISTR=SomeString;XRF=0,300000;XRI=0,3333;XRS=000,DDD;XAF=3;XAI=33;XAS=xx;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|3 GT 0/0 0/0 0/1 0/0 +12 102 rs101 C * 199 flt INDEL;AN=4;AC=.;DP=19;ISTR=SomeString;XRF=0,.;XRI=0,.;XRS=000,.;XAF=.;XAI=.;XAS=.;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|3 GT 0/1 1/0 0/0 0/0 +12 104 rs101 T A 199 flt INDEL;AN=4;AC=2;DP=19;ISTR=SomeString;XRF=0,200;XRI=0,2222;XRS=000,BBB;XAF=200000;XAI=22;XAS=DDD;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|2,3 GT 0/0 0/1 1/1 1/0 +12 104 rs101 T * 199 flt INDEL;AN=4;AC=.;DP=19;ISTR=SomeString;XRF=0,.;XRI=0,.;XRS=000,.;XAF=.;XAI=.;XAS=.;OLD_REC=12|101|GCGT|G,GCGA,GTGA,CCGT|2,3 GT 0/1 1/0 0/0 0/0 +12 201 . C G . . OLD_REC=12|201|CC|GG,GT|1,2 GT:FSTR:FFI:FFF:FAF:FAI:FAS:FRF:FRI:FRS:PL 0/0:xx:0:0:1.1:1:a:0,1.1:0,1:a,b:0,1,2 1:yy:11:1.1:1.1:1:a:0,1.1:0,1:a,b:0,2 1/1:zz:22:2.2:1.1:1:a:0,1.1:0,1:a,b:0,1,2 0:.:.:.:.:.:.:.,1.1:.,.:.,.:. +12 201 . C * . . OLD_REC=12|201|CC|GG,GT|1,2 GT:FSTR:FFI:FFF:FAF:FAI:FAS:FRF:FRI:FRS:PL 0/0:xx:0:0:.:.:.:0,.:0,.:a,.:0,.,. 0:yy:11:1.1:.:.:.:0,.:0,.:a,.:0,. 0/0:zz:22:2.2:.:.:.:0,.:0,.:a,.:0,.,. 0:.:.:.:.:.:.:.,1.1:.,.:.,.:. +12 202 . C G . . OLD_REC=12|201|CC|GG,GT|1 GT:FSTR:FFI:FFF:FAF:FAI:FAS:FRF:FRI:FRS:PL 0/0:xx:0:0:1.1:1:a:0,1.1:0,1:a,b:0,1,2 1:yy:11:1.1:1.1:1:a:0,1.1:0,1:a,b:0,2 0/0:zz:22:2.2:1.1:1:a:0,1.1:0,1:a,b:0,1,2 0:.:.:.:.:.:.:.,1.1:.,.:.,.:. +12 202 . C * . . OLD_REC=12|201|CC|GG,GT|1 GT:FSTR:FFI:FFF:FAF:FAI:FAS:FRF:FRI:FRS:PL 0/0:xx:0:0:.:.:.:0,.:0,.:a,.:0,.,. 0:yy:11:1.1:.:.:.:0,.:0,.:a,.:0,. 1/1:zz:22:2.2:.:.:.:0,.:0,.:a,.:0,.,. 0:.:.:.:.:.:.:.,1.1:.,.:.,.:. +12 202 . C T . . OLD_REC=12|201|CC|GG,GT|2 GT:FSTR:FFI:FFF:FAF:FAI:FAS:FRF:FRI:FRS:PL 0/0:xx:0:0:2.2:2:b:0,2.2:0,2:a,c:0,3,5 0:yy:11:1.1:2.2:2:b:0,2.2:0,2:a,c:0,5 1/1:zz:22:2.2:2.2:2:b:0,2.2:0,2:a,c:0,3,5 0:.:.:.:.:.:.:.,2.2:.,.:.,.:. +12 202 . C * . . OLD_REC=12|201|CC|GG,GT|2 GT:FSTR:FFI:FFF:FAF:FAI:FAS:FRF:FRI:FRS:PL 0/0:xx:0:0:.:.:.:0,.:0,.:a,.:0,.,. 1:yy:11:1.1:.:.:.:0,.:0,.:a,.:0,. 0/0:zz:22:2.2:.:.:.:0,.:0,.:a,.:0,.,. 0:.:.:.:.:.:.:.,2.2:.,.:.,.:. diff --git a/test/filter.13.vcf b/test/filter.13.vcf new file mode 100644 index 000000000..4b05b6064 --- /dev/null +++ b/test/filter.13.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.1 +##reference=file:///seq/references/1000Genomes-NCBI37.fasta +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +11 2343543 . N . . . . +11 5464562 . C T 9 . . diff --git a/test/gtcheck.1.out b/test/gtcheck.1.out index 03b684e31..365093647 100644 --- a/test/gtcheck.1.out +++ b/test/gtcheck.1.out @@ -1 +1 @@ -DC s1 s1 0 2.371900e+01 2 +DCv2 s1 s1 0 3.465736e-01 2 2 diff --git a/test/gtcheck.10.out b/test/gtcheck.10.out index 0862fe5ae..3038dd4ec 100644 --- a/test/gtcheck.10.out +++ b/test/gtcheck.10.out @@ -1 +1 @@ -DC s1 s1 4.002001e-03 2.371900e+01 2 +DCv2 s1 s1 0.000000e+00 3.465736e-01 2 2 diff --git a/test/gtcheck.11.out b/test/gtcheck.11.out index aa3fbe393..3038dd4ec 100644 --- a/test/gtcheck.11.out +++ b/test/gtcheck.11.out @@ -1 +1 @@ -DC s1 s1 0.000000e+00 2.371900e+01 2 +DCv2 s1 s1 0.000000e+00 3.465736e-01 2 2 diff --git a/test/gtcheck.12.out b/test/gtcheck.12.out index 451d85862..0ae8b70d8 100644 --- a/test/gtcheck.12.out +++ b/test/gtcheck.12.out @@ -1,10 +1,10 @@ -DC B A 5.733631e-01 2.253795e+00 2 -DC C A 4.938053e+00 8.675006e-01 2 -DC C B 2.791391e+00 8.675006e-01 2 -DC D A 5.022610e+00 0.000000e+00 2 -DC D B 5.178533e+00 0.000000e+00 2 -DC D C 4.938053e+00 0.000000e+00 2 -DC E A 7.325195e+00 0.000000e+00 2 -DC E B 5.178533e+00 0.000000e+00 2 -DC E C 2.635468e+00 1.386294e+00 2 -DC E D 2.720025e+00 2.407946e+00 2 +DCv2 B A 5.733631e-01 1.126897e+00 2 2 +DCv2 C A 4.938053e+00 4.337503e-01 2 2 +DCv2 C B 2.791391e+00 4.337503e-01 2 2 +DCv2 D A 5.022610e+00 0.000000e+00 2 2 +DCv2 D B 5.178533e+00 0.000000e+00 2 2 +DCv2 D C 4.938053e+00 0.000000e+00 2 2 +DCv2 E A 7.325195e+00 0.000000e+00 2 2 +DCv2 E B 5.178533e+00 0.000000e+00 2 2 +DCv2 E C 2.635468e+00 6.931472e-01 2 2 +DCv2 E D 2.720025e+00 3.566749e-01 2 2 diff --git a/test/gtcheck.2.out b/test/gtcheck.2.out index 1aee3f8b4..5a0b74e69 100644 --- a/test/gtcheck.2.out +++ b/test/gtcheck.2.out @@ -1 +1 @@ -DC s1 s1 0 6.931472e-01 1 +DCv2 s1 s1 0 6.931472e-01 1 1 diff --git a/test/gtcheck.3.1.out b/test/gtcheck.3.1.out new file mode 100644 index 000000000..8475d2416 --- /dev/null +++ b/test/gtcheck.3.1.out @@ -0,0 +1,3 @@ +DCv2 A D 2.302585e+00 0.000000e+00 1 1 +DCv2 A E 4.605170e+00 0.000000e+00 1 1 +DCv2 D E 2.302585e+00 0.000000e+00 1 1 diff --git a/test/gtcheck.3.out b/test/gtcheck.3.out index 3cba9cc21..ca16e843f 100644 --- a/test/gtcheck.3.out +++ b/test/gtcheck.3.out @@ -1,10 +1,10 @@ -DC B A 0 2.253795e+00 2 -DC C A 1 8.675006e-01 2 -DC C B 1 8.675006e-01 2 -DC D A 2 0.000000e+00 2 -DC D B 2 0.000000e+00 2 -DC D C 2 0.000000e+00 2 -DC E A 2 0.000000e+00 2 -DC E B 2 0.000000e+00 2 -DC E C 1 1.386294e+00 2 -DC E D 1 2.407946e+00 2 +DCv2 B A 0 1.126897e+00 2 2 +DCv2 C A 1 8.675006e-01 2 1 +DCv2 C B 1 8.675006e-01 2 1 +DCv2 D A 2 0.000000e+00 2 0 +DCv2 D B 2 0.000000e+00 2 0 +DCv2 D C 2 0.000000e+00 2 0 +DCv2 E A 2 0.000000e+00 2 0 +DCv2 E B 2 0.000000e+00 2 0 +DCv2 E C 1 1.386294e+00 2 1 +DCv2 E D 1 7.133499e-01 2 1 diff --git a/test/gtcheck.4.out b/test/gtcheck.4.out index d9d526b9d..5dcac63b3 100644 --- a/test/gtcheck.4.out +++ b/test/gtcheck.4.out @@ -1,3 +1,3 @@ -DC D C 2 0.000000e+00 2 -DC E C 1 1.386294e+00 2 -DC E D 1 2.407946e+00 2 +DCv2 D C 2 0.000000e+00 2 0 +DCv2 E C 1 1.386294e+00 2 1 +DCv2 E D 1 7.133499e-01 2 1 diff --git a/test/gtcheck.5.1.out b/test/gtcheck.5.1.out index 639730377..ffc594a2b 100644 --- a/test/gtcheck.5.1.out +++ b/test/gtcheck.5.1.out @@ -5,8 +5,9 @@ INFO sites-skipped-monoallelic 1 INFO sites-skipped-no-data 1 INFO sites-skipped-GT-not-diploid 1 INFO sites-skipped-PL-not-diploid 1 +INFO sites-skipped-filtering-expression 0 INFO sites-used-PL-vs-PL 0 INFO sites-used-PL-vs-GT 1 INFO sites-used-GT-vs-PL 0 INFO sites-used-GT-vs-GT 1 -DC A A 3.000150e-04 4.605170e+01 2 +DCv2 A A 1.000089e-12 0.000000e+00 2 2 diff --git a/test/gtcheck.5.out b/test/gtcheck.5.out index 88b89ed6e..3e873e3d0 100644 --- a/test/gtcheck.5.out +++ b/test/gtcheck.5.out @@ -1 +1 @@ -DC B D 2 0.000000e+00 2 +DCv2 B D 2 0.000000e+00 2 0 diff --git a/test/gtcheck.6.1.out b/test/gtcheck.6.1.out new file mode 100644 index 000000000..dabf26c69 --- /dev/null +++ b/test/gtcheck.6.1.out @@ -0,0 +1,2 @@ +DCv2 A B 0.000000e+00 5.753641e-01 1 1 +DCv2 B C 9.210340e+00 0.000000e+00 1 1 diff --git a/test/gtcheck.6.out b/test/gtcheck.6.out index bb2d90999..076b0d95a 100644 --- a/test/gtcheck.6.out +++ b/test/gtcheck.6.out @@ -1,2 +1,2 @@ -DC B C 1 8.675006e-01 2 -DC B D 2 0.000000e+00 2 +DCv2 B C 1 8.675006e-01 2 1 +DCv2 B D 2 0.000000e+00 2 0 diff --git a/test/gtcheck.6.vcf b/test/gtcheck.6.vcf new file mode 100644 index 000000000..184080a39 --- /dev/null +++ b/test/gtcheck.6.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.3 +##reference=ref.fa +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C D +1 1 . C T . PASS . GT 0/0 0/0 0/1 0/1 diff --git a/test/gtcheck.7.out b/test/gtcheck.7.out index 27f5c5f63..b0ee77f8e 100644 --- a/test/gtcheck.7.out +++ b/test/gtcheck.7.out @@ -1 +1 @@ -DC s1 s1 0 2.302585e+01 1 +DCv2 s1 s1 0 0.000000e+00 1 1 diff --git a/test/gtcheck.8.out b/test/gtcheck.8.out index ef120c68d..125155339 100644 --- a/test/gtcheck.8.out +++ b/test/gtcheck.8.out @@ -1,6 +1,6 @@ -DC A B 3 1.734223e+01 9 -DC C D 6 1.075056e+01 9 -DC E F 9 0.000000e+00 9 +DCv2 A B 3 5.877867e-01 9 6 +DCv2 C D 6 3.646431e-01 9 3 +DCv2 E F 9 0.000000e+00 9 0 DS 1 3 3 0 DS 2 3 3 1 DS 3 3 3 2 diff --git a/test/gtcheck.9.out b/test/gtcheck.9.out index 3cba9cc21..ca16e843f 100644 --- a/test/gtcheck.9.out +++ b/test/gtcheck.9.out @@ -1,10 +1,10 @@ -DC B A 0 2.253795e+00 2 -DC C A 1 8.675006e-01 2 -DC C B 1 8.675006e-01 2 -DC D A 2 0.000000e+00 2 -DC D B 2 0.000000e+00 2 -DC D C 2 0.000000e+00 2 -DC E A 2 0.000000e+00 2 -DC E B 2 0.000000e+00 2 -DC E C 1 1.386294e+00 2 -DC E D 1 2.407946e+00 2 +DCv2 B A 0 1.126897e+00 2 2 +DCv2 C A 1 8.675006e-01 2 1 +DCv2 C B 1 8.675006e-01 2 1 +DCv2 D A 2 0.000000e+00 2 0 +DCv2 D B 2 0.000000e+00 2 0 +DCv2 D C 2 0.000000e+00 2 0 +DCv2 E A 2 0.000000e+00 2 0 +DCv2 E B 2 0.000000e+00 2 0 +DCv2 E C 1 1.386294e+00 2 1 +DCv2 E D 1 7.133499e-01 2 1 diff --git a/test/gtcheck.ntop.1.out b/test/gtcheck.ntop.1.out index 2092aa6b6..414a95963 100644 --- a/test/gtcheck.ntop.1.out +++ b/test/gtcheck.ntop.1.out @@ -1,6 +1,6 @@ -DC smpl x1 4.951814e+01 2.197225e+00 6 -DC smpl x2 9.904588e+00 1.075056e+01 6 -DC smpl x3 1.000050e-03 1.075056e+01 5 -DC smpl x4 2.971136e+01 7.613325e+00 6 -DC smpl x5 1.200060e-03 1.258314e+01 6 -DC smpl x6 3.961475e+01 5.416100e+00 6 +DCv2 smpl x1 9.210340e+01 1.351550e-01 6 6 +DCv2 smpl x2 1.842068e+01 7.931820e-01 6 6 +DCv2 smpl x3 0.000000e+00 9.518185e-01 5 5 +DCv2 smpl x4 5.526204e+01 3.662041e-01 6 6 +DCv2 smpl x5 0.000000e+00 9.634573e-01 6 6 +DCv2 smpl x6 7.368272e+01 2.310491e-01 6 6 diff --git a/test/gtcheck.ntop.2.out b/test/gtcheck.ntop.2.out index 43843ea7c..a1c3e1f0d 100644 --- a/test/gtcheck.ntop.2.out +++ b/test/gtcheck.ntop.2.out @@ -1,2 +1,2 @@ -DC smpl x5 1.200060e-03 1.258314e+01 6 -DC smpl x3 1.000050e-03 1.075056e+01 5 +DCv2 smpl x3 0.000000e+00 9.518185e-01 5 5 +DCv2 smpl x5 0.000000e+00 9.634573e-01 6 6 diff --git a/test/head.1.out b/test/head.1.out new file mode 100644 index 000000000..7472c9b08 --- /dev/null +++ b/test/head.1.out @@ -0,0 +1 @@ +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 diff --git a/test/head.2.out b/test/head.2.out new file mode 100644 index 000000000..3460114fa --- /dev/null +++ b/test/head.2.out @@ -0,0 +1,2 @@ +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 +chr1 212740 . A G,<*> 0 . DP=73;I16=0,0,39,4,0,0,2743,192525,0,0,2580,154800,0,0,825,18621;QS=0,2,0;VDB=0.520868;SGB=-1.38232;MQSB=1;MQ0F=0 PL:DP:AD 255,72,0,255,72,255:24:0,24,0 255,57,0,255,57,255:19:0,19,0 diff --git a/test/head.3.out b/test/head.3.out new file mode 100644 index 000000000..8ece77e86 --- /dev/null +++ b/test/head.3.out @@ -0,0 +1,5 @@ +##fileformat=VCFv4.2 +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 +chr1 212740 . A G,<*> 0 . DP=73;I16=0,0,39,4,0,0,2743,192525,0,0,2580,154800,0,0,825,18621;QS=0,2,0;VDB=0.520868;SGB=-1.38232;MQSB=1;MQ0F=0 PL:DP:AD 255,72,0,255,72,255:24:0,24,0 255,57,0,255,57,255:19:0,19,0 +chr1 320055 . A <*> 0 . DP=101;I16=52,9,0,0,4116,300666,0,0,3660,219600,0,0,1281,29849,0,0;QS=2,0;MQSB=1;MQ0F=0 PL:DP:AD 0,87,255:29:29,0 0,96,255:32:32,0 diff --git a/test/merge.gvcf.2.1.out b/test/merge.gvcf.2.1.out new file mode 100644 index 000000000..b368d065d --- /dev/null +++ b/test/merge.gvcf.2.1.out @@ -0,0 +1,52 @@ +##fileformat=VCFv4.2 +##FILTER= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA BBB CCC +2 21444416 . G <*> . . END=21444427;MinDP=5;QS=1,0 PL:DP 0,15,125:5 .:. .:. +2 21444428 . C <*> . . END=21444429;MinDP=2;QS=2,0 PL:DP 0,15,125:5 0,6,51:2 .:. +2 21444430 . TCAA T,TAA 0 . MinDP=2;QS=1.60366,0.304878,0.0914634 PL:DP:DV 37,0,79,35,73,113:5:2 0,51,51,51,51,51:2:. .:.:. +2 21444431 . CA C,CAAACAAAAAA 0 . QS=0.75,0.25,1 PL:DP:DV 0,4,10,10,10,10:4:1 28,28,28,3,28,0:1:1 .:.:. +2 21444431 . C <*> . . MinDP=4;QS=1,0 PL:DP 0,12,110:4 .:. .:. +2 21444433 . C <*> 0 . END=21444444;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 .:.:. .:.:. +3 1 . C <*> 0 . END=4;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 .:.:. .:.:. +3 5 . C T 0 . MinDP=33;QS=1.5,0.25 PL:DP:DV 0,10,10:4:1 0,4,10:4:1 .:.:. +3 6 . N <*> 0 . END=10;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 .:.:. .:.:. +1 1619670 . C <*> 0 . END=1619782;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 .:.:. .:.:. +1 1619783 . C <*> 0 . END=1619787;MinDP=33;QS=0.75,1.25 PL:DP:DV 0,4,10:4:1 28,3,0:1:1 .:.:. +1 1619788 . G GAAAAAAA 0 . MinDP=33;QS=0.75,1 PL:DP:DV 0,10,10:4:1 28,3,0:1:1 .:.:. +1 1619789 . N <*> 0 . END=1619877;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 .:.:. .:.:. +4 20000975 . C <*> 0 . END=20001021;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 .:.:. .:.:. +4 20001022 . C <*> 0 . END=20001070;MinDP=33;QS=1.5,0.5 PL:DP:DV 0,4,10:4:1 0,4,10:4:1 .:.:. +4 20001071 . T G 0 . QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 0,4,10:4:1 .:.:. +5 110285 . T C . . . PL . 114,0,15 . +5 110285 . TAACCCC T . . . PL 89,6,0 . . +5 1110285 . T C . . . PL . 114,0,15 . +5 1110285 . T TAACCCC . . . PL 89,6,0 . . +6 600 . T <*> . . END=609 PL 66,1,1 . . +6 610 . T <*> . . . PL 66,1,1 66,2,1 . +6 611 . N <*> . . END=619 PL 66,1,1 . . +6 620 . T <*> . . END=625 PL 66,1,1 66,2,2 . +6 626 . N <*> . . END=629 PL 66,1,1 . . +6 630 . T <*> . . . PL 66,1,1 66,2,3 . +6 631 . N <*> . . END=666 PL 66,1,1 . . +7 701 . T <*> . . . PL 77,1,1 77,2,1 . +7 702 . T <*> . . . PL 77,1,1 77,2,2 . +7 703 . T <*> . . . PL 77,1,2 77,2,2 . +7 704 . N <*> . . END=777 PL 77,1,2 . . +8 1 . T <*> . . END=2 PL 88,1,1 . . +8 3 . T A . . . PL 88,1,1 88,2,1 88,3,1 +8 4 . N <*> . . END=10 PL 88,1,1 . . diff --git a/test/merge.gvcf.2.2.out b/test/merge.gvcf.2.2.out new file mode 100644 index 000000000..674dd55d9 --- /dev/null +++ b/test/merge.gvcf.2.2.out @@ -0,0 +1,52 @@ +##fileformat=VCFv4.2 +##FILTER= +##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA BBB CCC +2 21444416 . G . . . END=21444427;MinDP=5;QS=1 PL:DP 0:5 .:. .:. +2 21444428 . C . . . END=21444429;MinDP=2;QS=2 PL:DP 0:5 0:2 .:. +2 21444430 . TCAA T,TAA 0 . MinDP=2;QS=1.60366,0.304878,0.0914634 PL:DP:DV 37,0,79,35,73,113:5:2 0,51,51,51,51,51:2:. .:.:. +2 21444431 . CA C,CAAACAAAAAA 0 . QS=0.75,0.25,1 PL:DP:DV 0,4,10,10,10,10:4:1 28,28,28,3,28,0:1:1 .:.:. +2 21444431 . C . . . MinDP=4;QS=1 PL:DP 0:4 .:. .:. +2 21444433 . C . 0 . END=21444444;QS=0.75 PL:DP:DV 0:4:1 .:.:. .:.:. +3 1 . C . 0 . END=4;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 .:.:. .:.:. +3 5 . C T 0 . MinDP=33;QS=1.5,0.25 PL:DP:DV 0,10,10:4:1 0,4,10:4:1 .:.:. +3 6 . N . 0 . END=10;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 .:.:. .:.:. +1 1619670 . C . 0 . END=1619782;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 .:.:. .:.:. +1 1619783 . C . 0 . END=1619787;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 28:1:1 .:.:. +1 1619788 . G GAAAAAAA 0 . MinDP=33;QS=0.75,1 PL:DP:DV 0,10,10:4:1 28,3,0:1:1 .:.:. +1 1619789 . N . 0 . END=1619877;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 .:.:. .:.:. +4 20000975 . C . 0 . END=20001021;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 .:.:. .:.:. +4 20001022 . C . 0 . END=20001070;MinDP=33;QS=1.5 PL:DP:DV 0:4:1 0:4:1 .:.:. +4 20001071 . T G 0 . QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 0,4,10:4:1 .:.:. +5 110285 . T C . . . PL . 114,0,15 . +5 110285 . TAACCCC T . . . PL 89,6,0 . . +5 1110285 . T C . . . PL . 114,0,15 . +5 1110285 . T TAACCCC . . . PL 89,6,0 . . +6 600 . T . . . END=609 PL 66 . . +6 610 . T . . . . PL 66 66 . +6 611 . N . . . END=619 PL 66 . . +6 620 . T . . . END=625 PL 66 66 . +6 626 . N . . . END=629 PL 66 . . +6 630 . T . . . . PL 66 66 . +6 631 . N . . . END=666 PL 66 . . +7 701 . T . . . . PL 77 77 . +7 702 . T . . . . PL 77 77 . +7 703 . T . . . . PL 77 77 . +7 704 . N . . . END=777 PL 77 . . +8 1 . T . . . END=2 PL 88 . . +8 3 . T A . . . PL 88,1,1 88,2,1 88,3,1 +8 4 . N . . . END=10 PL 88 . . diff --git a/test/merge.gvcf.2.a.1.out b/test/merge.gvcf.2.a.1.out new file mode 100644 index 000000000..0cd0e9a27 --- /dev/null +++ b/test/merge.gvcf.2.a.1.out @@ -0,0 +1,15 @@ +2 21444416 . G <*> . . END=21444429;MinDP=5;QS=1,0 PL:DP 0,15,125:5 +2 21444430 . TCAA T,TAA 0 . QS=0.603659,0.304878,0.0914634 PL:DP:DV 37,0,79,35,73,113:5:2 +2 21444431 . C <*> . . MinDP=4;QS=1,0 PL:DP 0,12,110:4 +2 21444431 . CA C 0 . QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 +2 21444433 . C <*> 0 . END=21444444;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 +3 1 . C <*> 0 . END=10;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 +1 1619670 . C <*> 0 . END=1619877;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 +4 20000975 . C <*> 0 . END=20001070;MinDP=33;QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 +4 20001071 . T G 0 . . PL:DP:DV 0,4,10:4:1 +5 110285 . TAACCCC T . . . PL 89,6,0 +5 1110285 . T TAACCCC . . . PL 89,6,0 +6 600 . T <*> . . END=666 PL 66,1,1 +7 701 . T <*> . . END=702 PL 77,1,1 +7 703 . T <*> . . END=777 PL 77,1,2 +8 1 . T <*> . . END=10 PL 88,1,1 diff --git a/test/merge.gvcf.2.a.2.out b/test/merge.gvcf.2.a.2.out new file mode 100644 index 000000000..255ed1086 --- /dev/null +++ b/test/merge.gvcf.2.a.2.out @@ -0,0 +1,15 @@ +2 21444416 . G . . . END=21444429;MinDP=5;QS=1 PL:DP 0:5 +2 21444430 . TCAA T,TAA 0 . QS=0.603659,0.304878,0.0914634 PL:DP:DV 37,0,79,35,73,113:5:2 +2 21444431 . C . . . MinDP=4;QS=1 PL:DP 0:4 +2 21444431 . CA C 0 . QS=0.75,0.25 PL:DP:DV 0,4,10:4:1 +2 21444433 . C . 0 . END=21444444;QS=0.75 PL:DP:DV 0:4:1 +3 1 . C . 0 . END=10;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 +1 1619670 . C . 0 . END=1619877;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 +4 20000975 . C . 0 . END=20001070;MinDP=33;QS=0.75 PL:DP:DV 0:4:1 +4 20001071 . T G 0 . . PL:DP:DV 0,4,10:4:1 +5 110285 . TAACCCC T . . . PL 89,6,0 +5 1110285 . T TAACCCC . . . PL 89,6,0 +6 600 . T . . . END=666 PL 66 +7 701 . T . . . END=702 PL 77 +7 703 . T . . . END=777 PL 77 +8 1 . T . . . END=10 PL 88 diff --git a/test/mpileup.2.out b/test/mpileup.2.out index 43e022fd4..c449d2689 100644 --- a/test/mpileup.2.out +++ b/test/mpileup.2.out @@ -20,33 +20,33 @@ ##FORMAT= ##FORMAT= ##INFO= -##INFO= +##INFO= ##FORMAT= ##INFO= ##INFO= ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00100 HG00101 HG00102 -17 1 . A . . . END=301;MinDP=1 GT:DP ./.:5 ./.:1 ./.:3 +17 1 . A . . . END=301;MIN_DP=1 GT:DP ./.:5 ./.:1 ./.:3 17 302 . T TA 487.586 . INDEL;IDV=7;IMF=1;DP=25;VDB=0.27613;SGB=-4.22417;MQSB=0.0443614;MQ0F=0;AC=4;AN=6;DP4=2,4,8,11;MQ=49 GT:PL:DP:DV 0/1:167,0,96:11:6 0/1:157,0,9:7:6 1/1:201,21,0:7:7 -17 303 . G . . . END=827;MinDP=2 GT:DP 0/0:9 0/0:2 0/0:3 +17 303 . G . . . END=827;MIN_DP=2 GT:DP 0/0:9 0/0:2 0/0:3 17 828 . T C 409.29 . DP=25;VDB=0.842082;SGB=-4.20907;RPB=0.950652;MQB=1;MQSB=1;BQB=0.929717;MQ0F=0;AC=4;AN=6;DP4=2,4,8,11;MQ=60 GT:PL:DP:DV 0/1:211,0,35:12:10 0/1:116,0,91:9:5 1/1:120,12,0:4:4 -17 829 . T . . . END=833;MinDP=4 GT:DP 0/0:11 0/0:8 0/0:4 +17 829 . T . . . END=833;MIN_DP=4 GT:DP 0/0:11 0/0:8 0/0:4 17 834 . G A 363.72 . DP=25;VDB=0.788006;SGB=-4.01214;RPB=0.999233;MQB=1;MQSB=1;BQB=0.821668;MQ0F=0;AC=4;AN=6;DP4=2,3,7,10;MQ=60 GT:PL:DP:DV 0/1:185,0,46:11:9 0/1:128,0,59:8:5 1/1:89,9,0:3:3 -17 835 . T . . . END=1664;MinDP=1 GT:DP 0/0:5 0/0:2 0/0:1 +17 835 . T . . . END=1664;MIN_DP=1 GT:DP 0/0:5 0/0:2 0/0:1 17 1665 . T C 3.10665 . DP=20;VDB=0.1;SGB=0.346553;RPB=0.222222;MQB=0.611111;MQSB=0.988166;BQB=0.944444;MQ0F=0;AC=1;AN=6;DP4=7,11,1,1;MQ=55 GT:PL:DP:DV 0/0:0,21,185:7:0 0/0:0,27,222:9:0 0/1:35,0,51:4:2 -17 1666 . G . . . END=1868;MinDP=0 GT:DP 0/0:6 0/0:0 0/0:1 +17 1666 . G . . . END=1868;MIN_DP=0 GT:DP 0/0:6 0/0:0 0/0:1 17 1869 . A T 138.104 . DP=24;VDB=0.928022;SGB=-11.9537;RPB=0.984127;MQB=0.96464;MQSB=0.931547;BQB=0.359155;MQ0F=0;AC=4;AN=6;DP4=6,9,5,4;MQ=58 GT:PL:DP:DV 0/1:115,0,224:18:7 0/1:16,0,104:5:1 1/1:42,3,0:1:1 -17 1870 . C . . . END=2040;MinDP=1 GT:DP 0/0:13 0/0:2 0/0:1 +17 1870 . C . . . END=2040;MIN_DP=1 GT:DP 0/0:13 0/0:2 0/0:1 17 2041 . G A 447.444 . DP=31;VDB=0.816435;SGB=-4.18892;RPB=0.88473;MQB=0.972375;MQSB=0.968257;BQB=0.311275;MQ0F=0;AC=4;AN=6;DP4=6,5,12,7;MQ=58 GT:PL:DP:DV 0/1:229,0,212:21:11 0/1:32,0,24:2:1 1/1:223,21,0:7:7 -17 2042 . G . . . END=2219;MinDP=1 GT:DP 0/0:8 0/0:1 0/0:3 +17 2042 . G . . . END=2219;MIN_DP=1 GT:DP 0/0:8 0/0:1 0/0:3 17 2220 . G A 302.575 . DP=21;VDB=0.532753;SGB=-3.51597;RPB=0.964198;MQB=0.898397;MQSB=0.875769;BQB=0.0354359;MQ0F=0;AC=4;AN=6;DP4=6,2,1,11;MQ=58 GT:PL:DP:DV 0/1:139,0,130:12:6 0/1:69,0,46:4:2 1/1:131,12,0:4:4 -17 2221 . G . . . END=2563;MinDP=0 GT:DP 0/0:5 0/0:0 0/0:2 +17 2221 . G . . . END=2563;MIN_DP=0 GT:DP 0/0:5 0/0:0 0/0:2 17 2564 . A G 232.697 . DP=15;VDB=0.690812;SGB=-3.20711;RPB=0.197899;MQB=1;MQSB=1;BQB=0.965069;MQ0F=0;AC=4;AN=6;DP4=1,4,4,5;MQ=60 GT:PL:DP:DV 0/1:88,0,78:6:3 0/1:57,0,56:4:2 1/1:124,12,0:4:4 -17 2565 . A . . . END=3103;MinDP=0 GT:DP 0/0:6 0/0:0 0/0:1 +17 2565 . A . . . END=3103;MIN_DP=0 GT:DP 0/0:6 0/0:0 0/0:1 17 3104 . C T 24.2837 . DP=25;VDB=0.8;SGB=0.346553;RPB=0.717391;MQB=0.956522;MQSB=0.962269;BQB=0.978261;MQ0F=0;AC=1;AN=6;DP4=8,15,2,0;MQ=58 GT:PL:DP:DV 0/0:0,48,255:16:0 0/0:0,12,144:4:0 0/1:59,0,93:5:2 -17 3105 . T . . . END=3586;MinDP=2 GT:DP 0/0:5 0/0:2 0/0:3 +17 3105 . T . . . END=3586;MIN_DP=2 GT:DP 0/0:5 0/0:2 0/0:3 17 3587 . G A 357.834 . DP=29;VDB=0.902044;SGB=-3.91326;RPB=0.800999;MQB=1;MQSB=1;BQB=0.156944;MQ0F=0;AC=4;AN=6;DP4=4,7,10,6;MQ=60 GT:PL:DP:DV 0/1:161,0,184:14:7 0/1:22,0,118:5:1 1/1:212,24,0:8:8 -17 3588 . A . . . END=3935;MinDP=2 GT:DP 0/0:10 0/0:2 0/0:3 +17 3588 . A . . . END=3935;MIN_DP=2 GT:DP 0/0:10 0/0:2 0/0:3 17 3936 . A G 469.356 . DP=37;VDB=0.0574114;SGB=-4.60123;RPB=0.741697;MQB=0.812605;MQSB=0.143788;BQB=0.883831;MQ0F=0;AC=4;AN=6;DP4=5,6,6,17;MQ=56 GT:PL:DP:DV 0/1:233,0,206:20:11 0/1:77,0,58:6:4 1/1:196,24,0:8:8 -17 3937 . C . . . END=4101;MinDP=0 GT:DP 0/0:1 0/0:0 0/0:0 +17 3937 . C . . . END=4101;MIN_DP=0 GT:DP 0/0:1 0/0:0 0/0:0 diff --git a/test/mpileup/mpileup.6.out b/test/mpileup/mpileup.6.out index 5bb6f8220..127da35b0 100644 --- a/test/mpileup/mpileup.6.out +++ b/test/mpileup/mpileup.6.out @@ -20,100 +20,100 @@ ##FORMAT= ##FORMAT= ##INFO= -##INFO= +##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00100 HG00101 HG00102 -17 100 . C <*> . . END=102;MinDP=3;QS=3,0 PL:DP 0,27,182:9 0,9,108:3 0,15,132:5 +17 100 . C <*> . . END=102;MIN_DP=3;QS=3,0 PL:DP 0,27,182:9 0,9,108:3 0,15,132:5 17 103 . T C,<*> 0 . DP=18;I16=15,1,1,0,668,28542,6,36,929,54841,29,841,323,7035,6,36;QS=2.98256,0.0174419,0;SGB=-0.556633;RPBZ=-1.02125;MQBZ=-2.73861;MQSBZ=0.365148;BQBZ=-1.64924;SCBZ=4;MQ0F=0 PL:DP:DV 0,17,184,24,187,185:9:1 0,9,118,9,118,118:3:0 0,15,147,15,147,147:5:0 17 104 . G C,<*> 0 . DP=18;I16=15,1,1,0,601,24163,3,9,929,54841,29,841,320,6884,7,49;QS=2.98726,0.0127389,0;SGB=-0.556633;RPBZ=-1.02125;MQBZ=-2.73861;MQSBZ=0.365148;BQBZ=-1.64309;SCBZ=4;MQ0F=0 PL:DP:DV 0,18,173,24,176,173:9:1 0,9,101,9,101,101:3:0 0,15,133,15,133,133:5:0 -17 105 . G <*> . . END=108;MinDP=3;QS=3,0 PL:DP 0,30,171:10 0,9,92:3 0,15,124:5 +17 105 . G <*> . . END=108;MIN_DP=3;QS=3,0 PL:DP 0,30,171:10 0,9,92:3 0,15,124:5 17 109 . T C,<*> 0 . DP=19;I16=16,1,1,0,716,30648,2,4,989,58441,29,841,309,6415,12,144;QS=2.98922,0.0107817,0;SGB=-0.556633;RPBZ=-0.674966;MQBZ=-2.82843;MQSBZ=0.353553;BQBZ=-1.6661;SCBZ=3;MQ0F=0 PL:DP:DV 0,20,191,27,194,191:10:1 0,9,120,9,120,120:3:0 0,15,150,15,150,150:5:0 -17 110 . G <*> . . END=111;MinDP=3;QS=3,0 PL:DP 0,30,167:10 0,9,95:3 0,15,119:5 +17 110 . G <*> . . END=111;MIN_DP=3;QS=3,0 PL:DP 0,30,167:10 0,9,95:3 0,15,119:5 17 112 . C G,<*> 0 . DP=19;I16=16,1,1,0,657,26565,2,4,989,58441,29,841,301,6277,15,225;QS=2.98907,0.010929,0;SGB=-0.556633;RPBZ=-0.482118;MQBZ=-2.82843;MQSBZ=0.353553;BQBZ=-1.65292;SCBZ=3;MQ0F=0 PL:DP:DV 0,20,187,27,190,187:10:1 0,9,103,9,103,103:3:0 0,15,135,15,135,135:5:0 17 113 . A G,<*> 0 . DP=19;I16=16,1,1,0,635,25055,4,16,989,58441,29,841,297,6207,16,256;QS=2.98817,0.0118343,0;SGB=-0.556633;RPBZ=-0.482118;MQBZ=-2.82843;MQSBZ=0.353553;BQBZ=-1.65205;SCBZ=3;MQ0F=0 PL:DP:DV 0,20,172,27,175,172:10:1 0,9,102,9,102,102:3:0 0,15,139,15,139,139:5:0 -17 114 . C <*> . . MinDP=3;QS=3,0 PL:DP 0,30,181:10 0,9,113:3 0,15,133:5 +17 114 . C <*> . . MIN_DP=3;QS=3,0 PL:DP 0,30,181:10 0,9,113:3 0,15,133:5 17 115 . C A,<*> 0 . DP=21;I16=17,2,1,0,688,27426,12,144,1078,62882,29,841,283,5827,25,625;QS=2.88785,0.11215,0;SGB=-0.556633;RPBZ=0.260329;MQBZ=-2.38048;MQSBZ=-1.42419;BQBZ=-1.47798;SCBZ=-0.418446;MQ0F=0 PL:DP:DV 0,30,189,30,189,189:10:0 3,0,86,9,89,93:3:1 0,21,153,21,153,153:7:0 17 116 . A C,<*> 0 . DP=21;I16=17,2,1,0,705,27791,2,4,1078,62882,29,841,287,6073,19,361;QS=2.98873,0.0112676,0;SGB=-0.556633;RPBZ=0.0867763;MQBZ=-2.38048;MQSBZ=-1.42419;BQBZ=-1.65814;SCBZ=2.65016;MQ0F=0 PL:DP:DV 0,20,179,27,182,179:10:1 0,9,102,9,102,102:3:0 0,21,175,21,175,175:7:0 -17 117 . G <*> . . END=119;MinDP=2;QS=3,0 PL:DP 0,27,175:9 0,6,76:2 0,21,160:7 +17 117 . G <*> . . END=119;MIN_DP=2;QS=3,0 PL:DP 0,27,175:9 0,6,76:2 0,21,160:7 17 120 . A G,<*> 0 . DP=19;I16=15,2,1,0,646,25392,4,16,958,55682,29,841,277,5999,23,529;QS=2.9873,0.0126984,0;SGB=-0.556633;RPBZ=0.28942;MQBZ=-2.23607;MQSBZ=-1.30384;BQBZ=-1.6486;SCBZ=3;MQ0F=0 PL:DP:DV 0,18,171,24,174,171:9:1 0,6,88,6,88,88:2:0 0,21,171,21,171,171:7:0 -17 121 . G <*> . . END=124;MinDP=2;QS=3,0 PL:DP 0,24,170:8 0,6,84:2 0,18,154:6 +17 121 . G <*> . . END=124;MIN_DP=2;QS=3,0 PL:DP 0,24,170:8 0,6,84:2 0,18,154:6 17 125 . A T,<*> 0 . DP=18;I16=14,2,1,0,589,22565,4,16,898,52082,29,841,272,5916,25,625;QS=2.98444,0.0155642,0;SGB=-0.556633;RPBZ=1.02125;MQBZ=-2.16025;MQSBZ=-1.23956;BQBZ=-1.64106;SCBZ=2.91548;MQ0F=0 PL:DP:DV 0,15,149,21,152,149:8:1 0,9,114,9,114,114:3:0 0,18,162,18,162,162:6:0 -17 126 . A <*> . . END=158;MinDP=2;QS=3,0 PL:DP 0,18,128:6 0,6,81:2 0,15,102:5 +17 126 . A <*> . . END=158;MIN_DP=2;QS=3,0 PL:DP 0,18,128:6 0,6,81:2 0,15,102:5 17 159 . A G,<*> 0 . DP=15;I16=12,2,1,0,519,19467,5,25,809,47641,60,3600,260,5880,0,0;QS=2.97076,0.0292398,0;SGB=-0.556633;RPBZ=-1.62019;MQBZ=0.267261;MQSBZ=-2.54951;BQBZ=-1.63041;SCBZ=0;MQ0F=0 PL:DP:DV 0,21,157,21,157,157:7:0 0,6,83,6,83,83:2:0 0,10,129,15,132,129:6:1 -17 160 . G <*> . . END=173;MinDP=2;QS=3,0 PL:DP 0,15,106:5 0,6,75:2 0,15,138:5 -17 174 . G <*> . . END=188;MinDP=1;QS=3,0 PL:DP 0,15,111:5 0,3,27:1 0,21,116:7 -17 189 . C <*> . . MinDP=2;QS=3,0 PL:DP 0,18,152:6 0,6,67:2 0,21,167:7 +17 160 . G <*> . . END=173;MIN_DP=2;QS=3,0 PL:DP 0,15,106:5 0,6,75:2 0,15,138:5 +17 174 . G <*> . . END=188;MIN_DP=1;QS=3,0 PL:DP 0,15,111:5 0,3,27:1 0,21,116:7 +17 189 . C <*> . . MIN_DP=2;QS=3,0 PL:DP 0,18,152:6 0,6,67:2 0,21,167:7 17 190 . A C,<*> 0 . DP=15;I16=12,2,1,0,500,18230,5,25,778,44882,60,3600,243,5381,25,625;QS=2.97664,0.0233645,0;SGB=-0.556633;RPBZ=0;MQBZ=0.392232;MQSBZ=-3.74166;BQBZ=-1.63041;SCBZ=0;MQ0F=0 PL:DP:DV 0,18,138,18,138,138:6:0 0,6,68,6,68,68:2:0 0,12,153,18,156,153:7:1 -17 191 . T <*> . . END=222;MinDP=2;QS=3,0 PL:DP 0,18,140:6 0,6,63:2 0,12,73:4 +17 191 . T <*> . . END=222;MIN_DP=2;QS=3,0 PL:DP 0,18,140:6 0,6,63:2 0,12,73:4 17 223 . G T,<*> 0 . DP=13;I16=9,3,1,0,412,14782,8,64,627,34923,60,3600,243,5657,25,625;QS=2.96596,0.0340426,0;SGB=-0.556633;RPBZ=1.07052;MQBZ=0.547723;MQSBZ=-3.4641;BQBZ=-1.60577;SCBZ=0;MQ0F=0 PL:DP:DV 0,11,166,18,169,167:7:1 0,6,53,6,53,53:2:0 0,12,81,12,81,81:4:0 -17 224 . G <*> . . END=225;MinDP=2;QS=3,0 PL:DP 0,18,165:6 0,6,48:2 0,12,70:4 +17 224 . G <*> . . END=225;MIN_DP=2;QS=3,0 PL:DP 0,18,165:6 0,6,48:2 0,12,70:4 17 226 . A G,C,<*> 0 . DP=13;I16=8,3,1,1,381,13669,6,18,567,31323,89,4441,248,5894,44,986;QS=2.94293,0.0392157,0.0178571,0;VDB=0.84;SGB=-2.62246;RPBZ=-0.592157;MQBZ=-0.615457;MQSBZ=-3.4641;BQBZ=-2.17723;SCBZ=2.34521;MQ0F=0 PL:DP:DV 0,18,159,12,162,159,18,159,162,159:7:1 0,6,53,6,53,53,6,53,53,53:2:0 0,5,79,9,82,80,9,82,80,80:4:1 -17 227 . C <*> . . END=236;MinDP=2;QS=3,0 PL:DP 0,21,145:7 0,6,45:2 0,12,70:4 +17 227 . C <*> . . END=236;MIN_DP=2;QS=3,0 PL:DP 0,21,145:7 0,6,45:2 0,12,70:4 17 237 . A C,<*> 0 . DP=14;I16=9,4,1,0,465,16877,8,64,656,35764,60,3600,266,6282,25,625;QS=2.93162,0.0683761,0;SGB=-0.556633;RPBZ=1.11754;MQBZ=0.632456;MQSBZ=-3.60555;BQBZ=-1.61959;SCBZ=-0.27735;MQ0F=0 PL:DP:DV 0,24,206,24,206,206:8:0 0,6,53,6,53,53:2:0 0,3,84,9,87,87:4:1 -17 238 . C <*> . . END=255;MinDP=2;QS=3,0 PL:DP 0,24,211:8 0,6,50:2 0,12,64:4 -17 256 . A <*> . . END=266;MinDP=5;QS=3,0 PL:DP 0,33,247:11 0,15,92:5 0,15,122:5 +17 238 . C <*> . . END=255;MIN_DP=2;QS=3,0 PL:DP 0,24,211:8 0,6,50:2 0,12,64:4 +17 256 . A <*> . . END=266;MIN_DP=5;QS=3,0 PL:DP 0,33,247:11 0,15,92:5 0,15,122:5 17 267 . T G,<*> 0 . DP=21;I16=9,11,0,1,739,27465,8,64,960,50456,29,841,373,7935,25,625;QS=2.94203,0.057971,0;SGB=-0.556633;RPBZ=-0.330396;MQBZ=-1.23153;MQSBZ=-3.3021;BQBZ=-1.66282;SCBZ=2.91633;MQ0F=0 PL:DP:DV 0,33,254,33,254,254:11:0 0,6,93,12,96,96:5:1 0,15,149,15,149,149:5:0 -17 268 . T <*> . . END=272;MinDP=5;QS=3,0 PL:DP 0,33,238:11 0,15,91:5 0,15,143:5 +17 268 . T <*> . . END=272;MIN_DP=5;QS=3,0 PL:DP 0,33,238:11 0,15,91:5 0,15,143:5 17 273 . T C,<*> 0 . DP=22;I16=9,12,0,1,798,30664,5,25,989,51297,29,841,392,8620,25,625;QS=2.96094,0.0390625,0;SGB=-0.556633;RPBZ=-0.236567;MQBZ=-1.16701;MQSBZ=-3.42286;BQBZ=-1.66779;SCBZ=3.00076;MQ0F=0 PL:DP:DV 0,36,255,36,255,255:12:0 0,7,89,12,92,90:5:1 0,15,161,15,161,161:5:0 -17 274 . C <*> . . MinDP=5;QS=3,0 PL:DP 0,36,255:12 0,15,101:5 0,15,144:5 -17 275 . C <*> . . END=277;MinDP=4;QS=3,0 PL:DP 0,33,253:11 0,15,114:5 0,12,121:4 +17 274 . C <*> . . MIN_DP=5;QS=3,0 PL:DP 0,36,255:12 0,15,101:5 0,15,144:5 +17 275 . C <*> . . END=277;MIN_DP=4;QS=3,0 PL:DP 0,33,253:11 0,15,114:5 0,12,121:4 17 278 . A C,<*> 0 . DP=21;I16=6,14,1,0,722,26452,7,49,867,42179,60,3600,415,9521,11,121;QS=2.97935,0.020649,0;SGB=-0.556633;RPBZ=1.65198;MQBZ=1.0247;MQSBZ=-3.24037;BQBZ=-1.66667;SCBZ=-0.324037;MQ0F=0 PL:DP:DV 0,22,231,30,234,231:11:1 0,18,123,18,123,123:6:0 0,12,121,12,121,121:4:0 -17 279 . A <*> . . END=282;MinDP=4;QS=3,0 PL:DP 0,36,253:12 0,18,122:6 0,12,119:4 -17 283 . C <*> . . END=296;MinDP=5;QS=3,0 PL:DP 0,33,240:11 0,18,119:6 0,15,122:5 +17 279 . A <*> . . END=282;MIN_DP=4;QS=3,0 PL:DP 0,36,253:12 0,18,122:6 0,12,119:4 +17 283 . C <*> . . END=296;MIN_DP=5;QS=3,0 PL:DP 0,33,240:11 0,18,119:6 0,15,122:5 17 297 . C G,<*> 0 . DP=25;I16=9,15,1,0,901,34305,4,16,1138,59338,60,3600,445,9901,10,100;QS=2.98261,0.0173913,0;SGB=-0.556633;RPBZ=-1.24856;MQBZ=0.806872;MQSBZ=-3.22749;BQBZ=-1.67542;SCBZ=-0.368383;MQ0F=0 PL:DP:DV 0,33,255,33,255,255:11:0 0,15,168,21,171,168:8:1 0,18,161,18,161,161:6:0 -17 298 . A <*> . . END=301;MinDP=7;QS=3,0 PL:DP 0,30,231:10 0,21,172:7 0,21,189:7 +17 298 . A <*> . . END=301;MIN_DP=7;QS=3,0 PL:DP 0,30,231:10 0,21,172:7 0,21,189:7 17 302 . T TA 0 . INDEL;IDV=7;IMF=1;DP=25;I16=2,4,8,11,240,9600,760,30400,236,10564,993,55133,109,2229,377,8629;QS=0.539485,2.46052;VDB=0.241622;SGB=-4.22417;RPBZ=1.11989;MQBZ=1.47646;MQSBZ=-3.22749;BQBZ=-1.67542;SCBZ=-0.268121;MQ0F=0 PL:DP:DV 161,0,99:11:6 158,0,14:7:6 201,21,0:7:7 -17 303 . G <*> . . END=334;MinDP=7;QS=3,0 PL:DP 0,30,235:10 0,21,197:7 0,21,195:7 +17 303 . G <*> . . END=334;MIN_DP=7;QS=3,0 PL:DP 0,30,235:10 0,21,197:7 0,21,195:7 17 335 . A G,<*> 0 . DP=32;I16=13,18,1,0,1084,40336,4,16,1589,87297,60,3600,555,11943,0,0;QS=2.98919,0.0108108,0;SGB=-0.556633;RPBZ=-1.67936;MQBZ=0.622171;MQSBZ=-2.25492;BQBZ=-1.68602;SCBZ=-0.258065;MQ0F=0 PL:DP:DV 0,33,252,33,252,252:11:0 0,27,219,27,219,219:9:0 0,25,245,33,248,245:12:1 -17 336 . A <*> . . MinDP=9;QS=3,0 PL:DP 0,33,255:11 0,27,212:9 0,36,255:12 +17 336 . A <*> . . MIN_DP=9;QS=3,0 PL:DP 0,33,255:11 0,27,212:9 0,36,255:12 17 337 . C A,<*> 0 . DP=32;I16=14,17,0,1,1125,42481,5,25,1612,89528,37,1369,536,11590,18,324;QS=2.98113,0.0188679,0;SGB=-0.556633;RPBZ=0.812593;MQBZ=-1.03695;MQSBZ=-2.25492;BQBZ=-1.68353;SCBZ=-0.258065;MQ0F=0 PL:DP:DV 0,33,255,33,255,255:11:0 0,17,195,24,198,195:9:1 0,36,255,36,255,255:12:0 -17 338 . T <*> . . END=354;MinDP=8;QS=3,0 PL:DP 0,27,225:9 0,24,185:8 0,30,255:10 +17 338 . T <*> . . END=354;MIN_DP=8;QS=3,0 PL:DP 0,27,225:9 0,24,185:8 0,30,255:10 17 355 . G T,<*> 0 . DP=28;I16=14,13,0,1,1001,37907,41,1681,1442,81174,60,3600,547,12487,25,625;QS=2.875,0.125,0;SGB=-0.556633;RPBZ=0.185772;MQBZ=0.520126;MQSBZ=-1.7696;BQBZ=0.683978;SCBZ=-0.27735;MQ0F=0 PL:DP:DV 14,0,200,38,203,231:9:1 0,27,222,27,222,222:9:0 0,30,255,30,255,255:10:0 -17 356 . G <*> . . END=358;MinDP=8;QS=3,0 PL:DP 0,27,228:9 0,24,197:8 0,30,252:10 +17 356 . G <*> . . END=358;MIN_DP=8;QS=3,0 PL:DP 0,27,228:9 0,24,197:8 0,30,252:10 17 359 . G T,<*> 0 . DP=29;I16=15,13,0,1,1085,42761,10,100,1525,87005,60,3600,552,12620,25,625;QS=2.96032,0.0396825,0;SGB=-0.556633;RPBZ=-0.119611;MQBZ=0.456435;MQSBZ=-1.53333;BQBZ=-1.68246;SCBZ=5.2915;MQ0F=0 PL:DP:DV 0,30,255,30,255,255:10:0 0,13,178,21,181,180:8:1 0,33,255,33,255,255:11:0 -17 360 . A <*> . . END=368;MinDP=8;QS=3,0 PL:DP 0,30,252:10 0,24,204:8 0,30,255:10 +17 360 . A <*> . . END=368;MIN_DP=8;QS=3,0 PL:DP 0,30,252:10 0,24,204:8 0,30,255:10 17 369 . T G,<*> 0 . DP=28;I16=16,11,0,1,1037,40275,6,36,1496,86164,60,3600,548,12256,25,625;QS=2.97683,0.023166,0;SGB=-0.556633;RPBZ=0.12395;MQBZ=0.408248;MQSBZ=-1.37784;BQBZ=-1.68703;SCBZ=5.19615;MQ0F=0 PL:DP:DV 0,30,250,30,250,250:10:0 0,15,189,21,192,190:8:1 0,30,255,30,255,255:10:0 -17 370 . C <*> . . END=374;MinDP=8;QS=3,0 PL:DP 0,30,255:10 0,24,201:8 0,30,255:10 +17 370 . C <*> . . END=374;MIN_DP=8;QS=3,0 PL:DP 0,30,255:10 0,24,201:8 0,30,255:10 17 375 . A T,<*> 0 . DP=31;I16=17,13,0,1,1138,43798,14,196,1676,96964,60,3600,547,12177,4,16;QS=2.9661,0.0338983,0;SGB=-0.556633;RPBZ=-1.45432;MQBZ=0.3849;MQSBZ=-1.26404;BQBZ=-1.68488;SCBZ=0;MQ0F=0 PL:DP:DV 0,36,255,36,255,255:12:0 0,24,218,24,218,218:8:0 0,18,255,30,255,255:11:1 -17 376 . G <*> . . END=383;MinDP=7;QS=3,0 PL:DP 0,36,255:12 0,21,172:7 0,30,255:10 +17 376 . G <*> . . END=383;MIN_DP=7;QS=3,0 PL:DP 0,36,255:12 0,21,172:7 0,30,255:10 17 384 . A C,<*> 0 . DP=31;I16=19,11,0,1,1077,39885,4,16,1738,102482,60,3600,504,10988,25,625;QS=2.98419,0.0158103,0;SGB=-0.556633;RPBZ=0.615229;MQBZ=0.262613;MQSBZ=-0.333409;BQBZ=-1.68746;SCBZ=0;MQ0F=0 PL:DP:DV 0,39,255,39,255,255:13:0 0,15,171,21,174,171:8:1 0,30,255,30,255,255:10:0 -17 385 . C <*> . . END=400;MinDP=5;QS=3,0 PL:DP 0,36,255:12 0,15,133:5 0,30,255:10 +17 385 . C <*> . . END=400;MIN_DP=5;QS=3,0 PL:DP 0,36,255:12 0,15,133:5 0,30,255:10 17 401 . A C,<*> 0 . DP=29;I16=17,11,0,1,1063,41001,8,64,1587,92523,60,3600,501,11113,25,625;QS=2.97985,0.0201511,0;SGB=-0.556633;RPBZ=-0.478622;MQBZ=0.339683;MQSBZ=0.29364;BQBZ=-1.68267;SCBZ=0;MQ0F=0 PL:DP:DV 0,39,255,39,255,255:13:0 0,15,160,15,160,160:5:0 0,22,255,30,255,255:11:1 -17 402 . T <*> . . END=404;MinDP=5;QS=3,0 PL:DP 0,39,255:13 0,15,131:5 0,33,255:11 -17 405 . T <*> . . END=411;MinDP=4;QS=3,0 PL:DP 0,39,255:13 0,12,109:4 0,30,244:10 +17 402 . T <*> . . END=404;MIN_DP=5;QS=3,0 PL:DP 0,39,255:13 0,15,131:5 0,33,255:11 +17 405 . T <*> . . END=411;MIN_DP=4;QS=3,0 PL:DP 0,39,255:13 0,12,109:4 0,30,244:10 17 412 . C T,<*> 0 . DP=30;I16=17,12,1,0,1094,42458,14,196,1678,98882,60,3600,495,10659,25,625;QS=2.97455,0.0254545,0;SGB=-0.556633;RPBZ=-0.40473;MQBZ=0.267261;MQSBZ=-0.293785;BQBZ=-1.6942;SCBZ=0;MQ0F=0 PL:DP:DV 0,30,255,42,255,255:15:1 0,12,124,12,124,124:4:0 0,33,255,33,255,255:11:0 -17 413 . A <*> . . END=436;MinDP=3;QS=3,0 PL:DP 0,45,255:15 0,9,98:3 0,27,223:9 +17 413 . A <*> . . END=436;MIN_DP=3;QS=3,0 PL:DP 0,45,255:15 0,9,98:3 0,27,223:9 17 437 . T G,<*> 0 . DP=28;I16=13,14,1,0,990,36832,6,36,1558,91682,29,841,549,12435,16,256;QS=2.9781,0.0218978,0;SGB=-0.556633;RPBZ=-1.36214;MQBZ=-2.88675;MQSBZ=0.6;BQBZ=-1.67909;SCBZ=0;MQ0F=0 PL:DP:DV 0,48,255,48,255,255:16:0 0,9,109,9,109,109:3:0 0,17,200,24,203,201:9:1 -17 438 . A <*> . . END=464;MinDP=2;QS=3,0 PL:DP 0,48,255:16 0,6,97:2 0,27,198:9 +17 438 . A <*> . . END=464;MIN_DP=2;QS=3,0 PL:DP 0,48,255:16 0,6,97:2 0,27,198:9 17 465 . C T,<*> 0 . DP=33;I16=19,12,0,1,1173,45601,4,16,1775,103851,60,3600,589,12909,6,36;QS=2.98734,0.0126582,0;SGB=-0.556633;RPBZ=1.67982;MQBZ=0.321288;MQSBZ=0.341466;BQBZ=-1.68493;SCBZ=0;MQ0F=0 PL:DP:DV 0,51,255,51,255,255:17:0 0,15,158,15,158,158:5:0 0,20,224,27,227,224:10:1 -17 466 . A <*> . . END=470;MinDP=5;QS=3,0 PL:DP 0,54,255:18 0,15,165:5 0,30,238:10 +17 466 . A <*> . . END=470;MIN_DP=5;QS=3,0 PL:DP 0,54,255:18 0,15,165:5 0,30,238:10 17 471 . T G,C,<*> 0 . DP=36;I16=21,12,0,2,1220,46380,18,162,1918,113282,97,4969,611,13281,16,256;QS=2.94529,0.0273556,0.0273556,0;VDB=0.96;SGB=0.346553;RPBZ=0.497712;MQBZ=-1.9761;MQSBZ=0.312094;BQBZ=-2.35032;SCBZ=0;MQ0F=0 PL:DP:DV 0,57,255,57,255,255,57,255,255,255:19:0 0,15,169,15,169,169,15,169,169,169:5:0 0,19,219,19,221,219,27,222,222,221:11:2 -17 472 . T <*> . . END=487;MinDP=5;QS=3,0 PL:DP 0,51,255:17 0,15,146:5 0,30,241:10 +17 472 . T <*> . . END=487;MIN_DP=5;QS=3,0 PL:DP 0,51,255:17 0,15,146:5 0,30,241:10 17 488 . A G,<*> 0 . DP=35;I16=22,12,1,0,1278,48412,4,16,1986,117410,29,841,646,14380,25,625;QS=2.98947,0.0105263,0;SGB=-0.556633;RPBZ=0.594463;MQBZ=-3.36505;MQSBZ=0.10737;BQBZ=-1.69552;SCBZ=0;MQ0F=0 PL:DP:DV 0,54,255,54,255,255:18:0 0,18,177,18,177,177:6:0 0,23,255,30,255,255:11:1 -17 489 . A <*> . . END=511;MinDP=6;QS=3,0 PL:DP 0,51,255:17 0,18,175:6 0,24,221:8 +17 489 . A <*> . . END=511;MIN_DP=6;QS=3,0 PL:DP 0,51,255:17 0,18,175:6 0,24,221:8 17 512 . A C,<*> 0 . DP=33;I16=22,10,0,1,1133,41513,13,169,1866,110210,60,3600,628,14340,9,81;QS=2.97766,0.0223368,0;SGB=-0.556633;RPBZ=-1.47079;MQBZ=0.253876;MQSBZ=-0.461593;BQBZ=-1.68442;SCBZ=0;MQ0F=0 PL:DP:DV 0,39,255,51,255,255:18:1 0,21,183,21,183,183:7:0 0,24,231,24,231,231:8:0 17 513 . A T,<*> 0 . DP=32;I16=21,10,1,0,1125,42283,12,144,1806,106610,60,3600,623,14249,15,225;QS=2.97966,0.020339,0;SGB=-0.556633;RPBZ=-1.24609;MQBZ=0.258065;MQSBZ=-0.532795;BQBZ=-1.57376;SCBZ=0;MQ0F=0 PL:DP:DV 0,37,255,48,255,255:17:1 0,21,177,21,177,177:7:0 0,24,233,24,233,233:8:0 17 514 . A T,<*> 0 . DP=32;I16=22,9,0,1,1086,40204,16,256,1806,106610,60,3600,627,14381,11,121;QS=2.97127,0.0287253,0;SGB=-0.556633;RPBZ=-1.4628;MQBZ=0.258065;MQSBZ=-0.532795;BQBZ=-1.46657;SCBZ=0;MQ0F=0 PL:DP:DV 0,34,255,48,255,255:17:1 0,21,172,21,172,172:7:0 0,24,235,24,235,235:8:0 -17 515 . C <*> . . END=522;MinDP=5;QS=3,0 PL:DP 0,51,255:17 0,21,169:7 0,15,170:5 +17 515 . C <*> . . END=522;MIN_DP=5;QS=3,0 PL:DP 0,51,255:17 0,21,169:7 0,15,170:5 17 523 . T G,<*> 0 . DP=32;I16=23,8,1,0,1184,45708,15,225,1837,109369,60,3600,626,14446,25,625;QS=2.9794,0.0206044,0;SGB=-0.556633;RPBZ=1.13742;MQBZ=0.179605;MQSBZ=-1.73205;BQBZ=-1.68805;SCBZ=0;MQ0F=0 PL:DP:DV 0,44,255,57,255,255:20:1 0,21,191,21,191,191:7:0 0,15,166,15,166,166:5:0 -17 524 . T <*> . . END=534;MinDP=5;QS=3,0 PL:DP 0,54,255:18 0,21,172:7 0,15,133:5 +17 524 . T <*> . . END=534;MIN_DP=5;QS=3,0 PL:DP 0,54,255:18 0,21,172:7 0,15,133:5 17 535 . A G,<*> 0 . DP=31;I16=24,6,0,1,1080,39870,8,64,1777,105769,60,3600,611,13341,25,625;QS=2.98623,0.0137694,0;SGB=-0.556633;RPBZ=-0.894788;MQBZ=0.182574;MQSBZ=-1.85164;BQBZ=-1.6854;SCBZ=0;MQ0F=0 PL:DP:DV 0,41,255,51,255,255:18:1 0,21,194,21,194,194:7:0 0,18,189,18,189,189:6:0 -17 536 . C <*> . . END=547;MinDP=6;QS=3,0 PL:DP 0,54,255:18 0,21,166:7 0,18,157:6 +17 536 . C <*> . . END=547;MIN_DP=6;QS=3,0 PL:DP 0,54,255:18 0,21,166:7 0,18,157:6 17 548 . A C,<*> 0 . DP=33;I16=23,9,1,0,1153,42673,9,81,1866,110210,60,3600,561,12489,3,9;QS=2.98607,0.0139319,0;SGB=-0.556633;RPBZ=1.57584;MQBZ=0.253876;MQSBZ=-2.34521;BQBZ=-1.68939;SCBZ=3.80814;MQ0F=0 PL:DP:DV 0,44,255,54,255,255:19:1 0,21,181,21,181,181:7:0 0,21,211,21,211,211:7:0 17 549 . T G,<*> 0 . DP=32;I16=23,8,0,1,1135,42143,20,400,1806,106610,60,3600,532,11720,25,625;QS=2.96918,0.0308166,0;SGB=-0.556633;RPBZ=-0.487556;MQBZ=0.258065;MQSBZ=-2.29695;BQBZ=-1.68602;SCBZ=-0.258065;MQ0F=0 PL:DP:DV 0,34,255,51,255,255:18:1 0,21,168,21,168,168:7:0 0,21,208,21,208,208:7:0 -17 550 . T <*> . . END=558;MinDP=6;QS=3,0 PL:DP 0,45,255:15 0,18,143:6 0,18,177:6 +17 550 . T <*> . . END=558;MIN_DP=6;QS=3,0 PL:DP 0,45,255:15 0,18,143:6 0,18,177:6 17 559 . C A,<*> 0 . DP=27;I16=18,8,0,1,915,33775,14,196,1560,93600,29,841,473,10141,25,625;QS=2.92708,0.0729167,0;SGB=-0.556633;RPBZ=-1.02726;MQBZ=-5.09902;MQSBZ=-1.41421;BQBZ=-1.54776;SCBZ=5.09902;MQ0F=0 PL:DP:DV 0,45,255,45,255,255:15:0 0,4,116,15,119,123:6:1 0,18,169,18,169,169:6:0 -17 560 . C <*> . . END=565;MinDP=5;QS=3,0 PL:DP 0,45,255:15 0,15,121:5 0,21,154:7 +17 560 . C <*> . . END=565;MIN_DP=5;QS=3,0 PL:DP 0,45,255:15 0,15,121:5 0,21,154:7 17 566 . C A,<*> 0 . DP=29;I16=16,12,1,0,920,33998,9,81,1649,98041,60,3600,454,9734,25,625;QS=2.98321,0.016791,0;SGB=-0.556633;RPBZ=0.239193;MQBZ=0.188982;MQSBZ=-1.19024;BQBZ=-1.43942;SCBZ=-0.188982;MQ0F=0 PL:DP:DV 0,38,255,48,255,255:17:1 0,15,155,15,155,155:5:0 0,21,170,21,170,170:7:0 -17 567 . C <*> . . END=573;MinDP=6;QS=3,0 PL:DP 0,48,255:16 0,18,156:6 0,18,175:6 +17 567 . C <*> . . END=573;MIN_DP=6;QS=3,0 PL:DP 0,48,255:16 0,18,156:6 0,18,175:6 17 574 . C A,<*> 0 . DP=31;I16=18,11,0,1,1088,41328,15,225,1740,104400,29,841,478,10422,25,625;QS=2.94071,0.0592885,0;SGB=-0.556633;RPBZ=-0.173514;MQBZ=-5.38516;MQSBZ=-1.22474;BQBZ=-1.68578;SCBZ=3.60588;MQ0F=0 PL:DP:DV 0,48,255,48,255,255:16:0 0,9,170,21,173,177:8:1 0,18,173,18,173,173:6:0 17 575 . T C,<*> 0 . DP=30;I16=17,11,0,1,1048,41548,9,81,1680,100800,29,841,480,10426,25,625;QS=2.96786,0.0321429,0;SGB=-0.556633;RPBZ=-0.119685;MQBZ=-5.2915;MQSBZ=-1.19024;BQBZ=-1.68121;SCBZ=3.53589;MQ0F=0 PL:DP:DV 0,48,255,48,255,255:16:0 0,13,200,21,203,202:8:1 0,15,163,15,163,163:5:0 -17 576 . G <*> . . END=579;MinDP=5;QS=3,0 PL:DP 0,48,255:16 0,24,198:8 0,15,144:5 +17 576 . G <*> . . END=579;MIN_DP=5;QS=3,0 PL:DP 0,48,255:16 0,24,198:8 0,15,144:5 17 580 . A C,<*> 0 . DP=30;I16=15,14,1,0,1060,39178,16,256,1709,101641,60,3600,510,11078,17,289;QS=2.97338,0.0266223,0;SGB=-0.556633;RPBZ=1.32953;MQBZ=0.185695;MQSBZ=-1.06904;BQBZ=-1.69093;SCBZ=-0.267102;MQ0F=0 PL:DP:DV 0,34,255,48,255,255:17:1 0,24,221,24,221,221:8:0 0,15,155,15,155,155:5:0 -17 581 . A <*> . . MinDP=5;QS=3,0 PL:DP 0,54,255:18 0,24,223:8 0,15,153:5 +17 581 . A <*> . . MIN_DP=5;QS=3,0 PL:DP 0,54,255:18 0,24,223:8 0,15,153:5 17 582 . C G,<*> 0 . DP=31;I16=15,15,1,0,1080,39870,8,64,1769,105241,60,3600,519,11211,15,225;QS=2.98734,0.0126582,0;SGB=-0.556633;RPBZ=1.34259;MQBZ=0.182574;MQSBZ=-1.0328;BQBZ=-1.68266;SCBZ=-0.262467;MQ0F=0 PL:DP:DV 0,41,255,51,255,255:18:1 0,24,207,24,207,207:8:0 0,15,151,15,151,151:5:0 -17 583 . T <*> . . END=592;MinDP=5;QS=3,0 PL:DP 0,51,255:17 0,21,209:7 0,15,155:5 +17 583 . T <*> . . END=592;MIN_DP=5;QS=3,0 PL:DP 0,51,255:17 0,21,209:7 0,15,155:5 17 593 . C A,<*> 0 . DP=31;I16=16,14,0,1,1071,39925,7,49,1769,105241,29,841,561,12253,25,625;QS=2.97021,0.0297872,0;SGB=-0.556633;RPBZ=0.559355;MQBZ=-3.80789;MQSBZ=-0.0464238;BQBZ=-1.57464;SCBZ=3.67454;MQ0F=0 PL:DP:DV 0,54,255,54,255,255:18:0 0,12,184,18,187,185:7:1 0,18,174,18,174,174:6:0 17 594 . A G,<*> 0 . DP=33;I16=16,16,1,0,1161,42757,4,16,1858,109682,60,3600,572,12672,15,225;QS=2.99359,0.00641026,0;SGB=-0.556633;RPBZ=-0.998367;MQBZ=0.254;MQSBZ=-0.0435607;BQBZ=-1.68954;SCBZ=-0.253876;MQ0F=0 PL:DP:DV 0,42,255,51,255,255:18:1 0,21,205,21,205,205:7:0 0,24,213,24,213,213:8:0 -17 595 . A <*> . . MinDP=7;QS=3,0 PL:DP 0,54,255:18 0,21,198:7 0,24,218:8 +17 595 . A <*> . . MIN_DP=7;QS=3,0 PL:DP 0,54,255:18 0,21,198:7 0,24,218:8 17 596 . A G,<*> 0 . DP=33;I16=16,16,1,0,1061,37187,8,64,1858,109682,60,3600,590,12952,1,1;QS=2.98564,0.0143627,0;SGB=-0.556633;RPBZ=1.68146;MQBZ=0.254;MQSBZ=-0.0435607;BQBZ=-1.68357;SCBZ=-0.253876;MQ0F=0 PL:DP:DV 0,41,255,51,255,255:18:1 0,21,169,21,169,169:7:0 0,24,231,24,231,231:8:0 -17 597 . C <*> . . END=600;MinDP=7;QS=3,0 PL:DP 0,51,255:17 0,21,194:7 0,24,220:8 +17 597 . C <*> . . END=600;MIN_DP=7;QS=3,0 PL:DP 0,51,255:17 0,21,194:7 0,24,220:8 diff --git a/test/norm.symbolic.1.out b/test/norm.symbolic.1.out index cec427798..dfc84a427 100644 --- a/test/norm.symbolic.1.out +++ b/test/norm.symbolic.1.out @@ -6,6 +6,6 @@ ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO 20 15 . TAC T . . ORI=20|24|ACA|A -20 15 . TAC . . END=17;SVTYPE=DEL;ORI=20|24|A| +20 15 . T . . END=17;SVTYPE=DEL;ORI=20|24|A| 20 93 . CAAA C . . ORI=20|98|AAAA|A -20 93 . CAAA . . END=96;SVTYPE=DEL;ORI=20|98|A| +20 93 . C . . END=96;SVTYPE=DEL;ORI=20|98|A| diff --git a/test/norm.symbolic.2.out b/test/norm.symbolic.2.out new file mode 100644 index 000000000..5e753f89f --- /dev/null +++ b/test/norm.symbolic.2.out @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##FILTER= +##contig= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +20 15 . TAC T, . . END=17;SVTYPE=DEL;ORI=20|24|ACA|A, +20 93 . CAAA C . . ORI=20|98|AAAA|A +20 93 . C . . END=96;SVTYPE=DEL;ORI=20|98|A| diff --git a/test/norm.symbolic.2.vcf b/test/norm.symbolic.2.vcf new file mode 100644 index 000000000..8a2dc5e4f --- /dev/null +++ b/test/norm.symbolic.2.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.2 +##contig= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +20 24 . ACA A, . . END=26;SVTYPE=DEL +20 98 . AAAA A . . . +20 98 . A . . END=101;SVTYPE=DEL diff --git a/test/query.96.out b/test/query.96.out index 585c684bc..51630d1b3 100644 --- a/test/query.96.out +++ b/test/query.96.out @@ -1,2 +1,2 @@ -#[1]CHROM [2]POS [3]SAMPLE [4]DP [5]GT[6]CHROM [7]POS [8]SAMPLE [9]DP [10]GT -4 3258449 C 1 1/14 3258449 D 0 0/0 +#[1]C:CHROM [2]C:POS [3]C:SAMPLE [4]C:DP [5]C:GT [6]D:CHROM [7]D:POS [8]D:SAMPLE [9]D:DP [10]D:GT +4 3258449 C 1 1/1 4 3258449 D 0 0/0 diff --git a/test/query.97.out b/test/query.97.out index c409c224f..eb37c2d82 100644 --- a/test/query.97.out +++ b/test/query.97.out @@ -1,2 +1,2 @@ -#[1]CHROM [2]POS [3]SAMPLE [4]DP [5]GT [6]SAMPLE [7]DP [8]GT +#[1]CHROM [2]POS [3]C:SAMPLE [4]C:DP [5]C:GT [6]D:SAMPLE [7]D:DP [8]D:GT 4 3258449 C 1 1/1 D 0 0/0 diff --git a/test/query.98.out b/test/query.98.out index b53f5968c..5fe244810 100644 --- a/test/query.98.out +++ b/test/query.98.out @@ -1,2 +1,2 @@ -#[1]CHROM [2]POS [3]SAMPLE [4]SAMPLE [5]DP [6]DP [7]GT [8]GT +#[1]CHROM [2]POS [3]C:SAMPLE [4]D:SAMPLE [5]C:DP [6]D:DP [7]C:GT [8]D:GT 4 3258449 C D 1 0 1/1 0/0 diff --git a/test/query.99.out b/test/query.99.out new file mode 100644 index 000000000..1b0e1e70e --- /dev/null +++ b/test/query.99.out @@ -0,0 +1 @@ +11 2343543 N . . diff --git a/test/query.string.2.1.out b/test/query.string.2.1.out new file mode 100644 index 000000000..b7ac07862 --- /dev/null +++ b/test/query.string.2.1.out @@ -0,0 +1,2 @@ +1 865568 AA,BB +1 865628 BB,CC diff --git a/test/query.string.2.1.txt b/test/query.string.2.1.txt new file mode 100644 index 000000000..474c46639 --- /dev/null +++ b/test/query.string.2.1.txt @@ -0,0 +1 @@ +BB diff --git a/test/query.string.2.2.out b/test/query.string.2.2.out new file mode 100644 index 000000000..8add69512 --- /dev/null +++ b/test/query.string.2.2.out @@ -0,0 +1 @@ +1 865568 P,Q Q,R diff --git a/test/query.string.2.2.txt b/test/query.string.2.2.txt new file mode 100644 index 000000000..73c52c3e3 --- /dev/null +++ b/test/query.string.2.2.txt @@ -0,0 +1 @@ +Q diff --git a/test/query.string.2.vcf b/test/query.string.2.vcf new file mode 100644 index 000000000..60bb48ba3 --- /dev/null +++ b/test/query.string.2.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.3 +##reference=GRCh37 +##contig= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 +1 865568 . G A . . STR=AA,BB STR P,Q Q,R +1 865628 . G A . . STR=BB,CC STR R,S S,T +1 865716 . G A . . STR=CC,DD STR T,U U,V diff --git a/test/split-vep.18.out b/test/split-vep.18.out index 3c45bf03b..8ed65c1e9 100644 --- a/test/split-vep.18.out +++ b/test/split-vep.18.out @@ -1,4 +1,4 @@ -1 979496 . T C . . CSQ=C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1192A>G|ENSP00000343864.2:p.Ser398Gly|1228|1192|398|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||tolerated(0.2)|benign(0.003)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13302983|3631|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1534A>G|ENSP00000414022.3:p.Ser512Gly|1534|1534|512|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||tolerated(0.18)|benign(0.011)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13302983|2855|-1||SNV|HGNC|HGNC:28208|||1||||||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13302983|3631|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1;Allele=C,C,C,C,C,C,C;Consequence=missense_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,missense_variant,upstream_gene_variant,downstream_gene_variant;IMPACT=MODERATE,MODIFIER,MODIFIER,MODIFIER,MODERATE,MODIFIER,MODIFIER;SYMBOL=PERM1,PLEKHN1,PLEKHN1,PLEKHN1,PERM1,PERM1,PLEKHN1;Gene=ENSG00000187642,ENSG00000187583,ENSG00000187583,ENSG00000187583,ENSG00000187642,ENSG00000187642,ENSG00000187583;Feature_type=Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,Transcript;Feature=ENST00000341290,ENST00000379407,ENST00000379409,ENST00000379410,ENST00000433179,ENST00000479361,ENST00000491024;BIOTYPE=protein_coding,protein_coding,protein_coding,protein_coding,protein_coding,retained_intron,protein_coding;EXON=3/5,.,.,.,1/3,.,.;INTRON=.,.,.,.,.,.,.;HGVSc=ENST00000341290.6:c.1192A>G,.,.,.,ENST00000433179.3:c.1534A>G,.,.;HGVSp=ENSP00000343864.2:p.Ser398Gly,.,.,.,ENSP00000414022.3:p.Ser512Gly,.,.;cDNA_position=1228,.,.,.,1534,.,.;CDS_position=1192,.,.,.,1534,.,.;Protein_position=398,.,.,.,512,.,.;Amino_acids=S/G,.,.,.,S/G,.,.;Codons=Agc/Ggc,.,.,.,Agc/Ggc,.,.;Existing_variation=rs13302983,rs13302983,rs13302983,rs13302983,rs13302983,rs13302983,rs13302983;DISTANCE=.,4488,4488,3631,.,2855,3631;STRAND=-1,1,1,1,-1,-1,1;FLAGS=.,.,.,.,.,.,cds_start_NF;VARIANT_CLASS=SNV,SNV,SNV,SNV,SNV,SNV,SNV;SYMBOL_SOURCE=HGNC,HGNC,HGNC,HGNC,HGNC,HGNC,HGNC;HGNC_ID=HGNC:28208,HGNC:25284,HGNC:25284,HGNC:25284,HGNC:28208,HGNC:28208,HGNC:25284;CANONICAL=.,.,.,YES,YES,.,.;MANE=.,.,.,NM_032129.3,.,.,.;TSL=2,1,2,1,5,1,3;APPRIS=A2,A2,A2,P3,P2,.,.;CCDS=.,CCDS53256.1,.,CCDS4.1,CCDS76083.1,.,.;ENSP=ENSP00000343864,ENSP00000368717,ENSP00000368719,ENSP00000368720,ENSP00000414022,.,ENSP00000462558;SWISSPROT=Q5SV97,Q494U1,Q494U1,Q494U1,Q5SV97,.,.;TREMBL=.,.,.,.,.,.,J3KSM5;UNIPARC=.,.,.,UPI00001416D8,.,.,UPI000268AE1F;GENE_PHENO=.,.,.,.,.,.,.;SIFT=tolerated(0.2),.,.,.,tolerated(0.18),.,.;PolyPhen=benign(0.003),.,.,.,benign(0.011),.,.;DOMAINS=PANTHER:PTHR47282&MobiDB_lite:mobidb-lite,.,.,.,PANTHER:PTHR47282&MobiDB_lite:mobidb-lite,.,.;miRNA=.,.,.,.,.,.,.;HGVS_OFFSET=.,.,.,.,.,.,.;AF=.,.,.,.,.,.,.;AFR_AF=0.9418,0.9418,0.9418,0.9418,0.9418,0.9418,0.9418;AMR_AF=0.9539,0.9539,0.9539,0.9539,0.9539,0.9539,0.9539;EAS_AF=0.999,0.999,0.999,0.999,0.999,0.999,0.999;EUR_AF=0.9592,0.9592,0.9592,0.9592,0.9592,0.9592,0.9592;SAS_AF=0.9847,0.9847,0.9847,0.9847,0.9847,0.9847,0.9847;AA_AF=.,.,.,.,.,.,.;EA_AF=.,.,.,.,.,.,.;gnomAD_AF=0.9721,0.9721,0.9721,0.9721,0.9721,0.9721,0.9721;gnomAD_AFR_AF=0.9533,0.9533,0.9533,0.9533,0.9533,0.9533,0.9533;gnomAD_AMR_AF=0.9754,0.9754,0.9754,0.9754,0.9754,0.9754,0.9754;gnomAD_ASJ_AF=0.9166,0.9166,0.9166,0.9166,0.9166,0.9166,0.9166;gnomAD_EAS_AF=0.9999,0.9999,0.9999,0.9999,0.9999,0.9999,0.9999;gnomAD_FIN_AF=0.9923,0.9923,0.9923,0.9923,0.9923,0.9923,0.9923;gnomAD_NFE_AF=0.9677,0.9677,0.9677,0.9677,0.9677,0.9677,0.9677;gnomAD_OTH_AF=0.9536,0.9536,0.9536,0.9536,0.9536,0.9536,0.9536;gnomAD_SAS_AF=0.9811,0.9811,0.9811,0.9811,0.9811,0.9811,0.9811;MAX_AF=0.9999,0.9999,0.9999,0.9999,0.9999,0.9999,0.9999;MAX_AF_POPS=gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS;CLIN_SIG=.,.,.,.,.,.,.;SOMATIC=.,.,.,.,.,.,.;PHENO=.,.,.,.,.,.,.;PUBMED=.,.,.,.,.,.,.;MOTIF_NAME=.,.,.,.,.,.,.;MOTIF_POS=.,.,.,.,.,.,.;HIGH_INF_POS=.,.,.,.,.,.,.;MOTIF_SCORE_CHANGE=.,.,.,.,.,.,.;existing_InFrame_oORFs=.,.,.,.,.,.,.;existing_OutOfFrame_oORFs=.,.,.,.,.,.,.;existing_uORFs=.,.,.,.,.,.,.;five_prime_UTR_variant_annotation=.,.,.,.,.,.,.;five_prime_UTR_variant_consequence=.,.,.,.,.,.,.;SpliceRegion=.,.,.,.,.,.,.;LoF=.,.,.,.,.,.,.;LoF_filter=.,.,.,.,.,.,.;LoF_flags=.,.,.,.,.,.,.;LoF_info=.,.,.,.,.,.,.;SpliceAI_pred_DP_AG=-27,-27,-27,-27,-27,-27,-27;SpliceAI_pred_DP_AL=-2,-2,-2,-2,-2,-2,-2;SpliceAI_pred_DP_DG=23,23,23,23,23,23,23;SpliceAI_pred_DP_DL=25,25,25,25,25,25,25;SpliceAI_pred_DS_AG=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_DS_AL=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_DS_DG=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_DS_DL=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_SYMBOL=PERM1,PERM1,PERM1,PERM1,PERM1,PERM1,PERM1 -1 979560 . T C . . CSQ=C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1128A>G|ENSP00000343864.2:p.Ala376%3D|1164|1128|376|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13303033|3695|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1470A>G|ENSP00000414022.3:p.Ala490%3D|1470|1470|490|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13303033|2919|-1||SNV|HGNC|HGNC:28208|||1|||||||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13303033|3695|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1;Allele=C,C,C,C,C,C,C;Consequence=synonymous_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,synonymous_variant,upstream_gene_variant,downstream_gene_variant;IMPACT=LOW,MODIFIER,MODIFIER,MODIFIER,LOW,MODIFIER,MODIFIER;SYMBOL=PERM1,PLEKHN1,PLEKHN1,PLEKHN1,PERM1,PERM1,PLEKHN1;Gene=ENSG00000187642,ENSG00000187583,ENSG00000187583,ENSG00000187583,ENSG00000187642,ENSG00000187642,ENSG00000187583;Feature_type=Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,Transcript;Feature=ENST00000341290,ENST00000379407,ENST00000379409,ENST00000379410,ENST00000433179,ENST00000479361,ENST00000491024;BIOTYPE=protein_coding,protein_coding,protein_coding,protein_coding,protein_coding,retained_intron,protein_coding;EXON=3/5,.,.,.,1/3,.,.;INTRON=.,.,.,.,.,.,.;HGVSc=ENST00000341290.6:c.1128A>G,.,.,.,ENST00000433179.3:c.1470A>G,.,.;HGVSp=ENSP00000343864.2:p.Ala376%3D,.,.,.,ENSP00000414022.3:p.Ala490%3D,.,.;cDNA_position=1164,.,.,.,1470,.,.;CDS_position=1128,.,.,.,1470,.,.;Protein_position=376,.,.,.,490,.,.;Amino_acids=A,.,.,.,A,.,.;Codons=gcA/gcG,.,.,.,gcA/gcG,.,.;Existing_variation=rs13303033,rs13303033,rs13303033,rs13303033,rs13303033,rs13303033,rs13303033;DISTANCE=.,4552,4552,3695,.,2919,3695;STRAND=-1,1,1,1,-1,-1,1;FLAGS=.,.,.,.,.,.,cds_start_NF;VARIANT_CLASS=SNV,SNV,SNV,SNV,SNV,SNV,SNV;SYMBOL_SOURCE=HGNC,HGNC,HGNC,HGNC,HGNC,HGNC,HGNC;HGNC_ID=HGNC:28208,HGNC:25284,HGNC:25284,HGNC:25284,HGNC:28208,HGNC:28208,HGNC:25284;CANONICAL=.,.,.,YES,YES,.,.;MANE=.,.,.,NM_032129.3,.,.,.;TSL=2,1,2,1,5,1,3;APPRIS=A2,A2,A2,P3,P2,.,.;CCDS=.,CCDS53256.1,.,CCDS4.1,CCDS76083.1,.,.;ENSP=ENSP00000343864,ENSP00000368717,ENSP00000368719,ENSP00000368720,ENSP00000414022,.,ENSP00000462558;SWISSPROT=Q5SV97,Q494U1,Q494U1,Q494U1,Q5SV97,.,.;TREMBL=.,.,.,.,.,.,J3KSM5;UNIPARC=.,.,.,UPI00001416D8,.,.,UPI000268AE1F;GENE_PHENO=.,.,.,.,.,.,.;SIFT=.,.,.,.,.,.,.;PolyPhen=.,.,.,.,.,.,.;DOMAINS=PANTHER:PTHR47282&Low_complexity_(Seg):seg,.,.,.,PANTHER:PTHR47282&Low_complexity_(Seg):seg,.,.;miRNA=.,.,.,.,.,.,.;HGVS_OFFSET=.,.,.,.,.,.,.;AF=0.5096,0.5096,0.5096,0.5096,0.5096,0.5096,0.5096;AFR_AF=0.177,0.177,0.177,0.177,0.177,0.177,0.177;AMR_AF=0.6095,0.6095,0.6095,0.6095,0.6095,0.6095,0.6095;EAS_AF=0.7123,0.7123,0.7123,0.7123,0.7123,0.7123,0.7123;EUR_AF=0.5895,0.5895,0.5895,0.5895,0.5895,0.5895,0.5895;SAS_AF=0.5971,0.5971,0.5971,0.5971,0.5971,0.5971,0.5971;AA_AF=.,.,.,.,.,.,.;EA_AF=.,.,.,.,.,.,.;gnomAD_AF=0.5887,0.5887,0.5887,0.5887,0.5887,0.5887,0.5887;gnomAD_AFR_AF=0.2428,0.2428,0.2428,0.2428,0.2428,0.2428,0.2428;gnomAD_AMR_AF=0.6604,0.6604,0.6604,0.6604,0.6604,0.6604,0.6604;gnomAD_ASJ_AF=0.5617,0.5617,0.5617,0.5617,0.5617,0.5617,0.5617;gnomAD_EAS_AF=0.7237,0.7237,0.7237,0.7237,0.7237,0.7237,0.7237;gnomAD_FIN_AF=0.5334,0.5334,0.5334,0.5334,0.5334,0.5334,0.5334;gnomAD_NFE_AF=0.5801,0.5801,0.5801,0.5801,0.5801,0.5801,0.5801;gnomAD_OTH_AF=0.5647,0.5647,0.5647,0.5647,0.5647,0.5647,0.5647;gnomAD_SAS_AF=0.6213,0.6213,0.6213,0.6213,0.6213,0.6213,0.6213;MAX_AF=0.7237,0.7237,0.7237,0.7237,0.7237,0.7237,0.7237;MAX_AF_POPS=gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS;CLIN_SIG=.,.,.,.,.,.,.;SOMATIC=.,.,.,.,.,.,.;PHENO=.,.,.,.,.,.,.;PUBMED=.,.,.,.,.,.,.;MOTIF_NAME=.,.,.,.,.,.,.;MOTIF_POS=.,.,.,.,.,.,.;HIGH_INF_POS=.,.,.,.,.,.,.;MOTIF_SCORE_CHANGE=.,.,.,.,.,.,.;existing_InFrame_oORFs=.,.,.,.,.,.,.;existing_OutOfFrame_oORFs=.,.,.,.,.,.,.;existing_uORFs=.,.,.,.,.,.,.;five_prime_UTR_variant_annotation=.,.,.,.,.,.,.;five_prime_UTR_variant_consequence=.,.,.,.,.,.,.;SpliceRegion=.,.,.,.,.,.,.;LoF=.,.,.,.,.,.,.;LoF_filter=.,.,.,.,.,.,.;LoF_flags=.,.,.,.,.,.,.;LoF_info=.,.,.,.,.,.,.;SpliceAI_pred_DP_AG=28,28,28,28,28,28,28;SpliceAI_pred_DP_AL=-30,-30,-30,-30,-30,-30,-30;SpliceAI_pred_DP_DG=-39,-39,-39,-39,-39,-39,-39;SpliceAI_pred_DP_DL=3,3,3,3,3,3,3;SpliceAI_pred_DS_AG=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_DS_AL=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_DS_DG=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_DS_DL=0.00,0.00,0.00,0.00,0.00,0.00,0.00;SpliceAI_pred_SYMBOL=PERM1,PERM1,PERM1,PERM1,PERM1,PERM1,PERM1 -1 999842 . C A . . CSQ=A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000304952|protein_coding||1/3|ENST00000304952.11:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149||NM_021170.4|1|P1|CCDS5.1|ENSP00000304595|Q9HCC6|||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|missense_variant|MODERATE|HES4|ENSG00000188290|Transcript|ENST00000428771|protein_coding|1/3||ENST00000428771.6:c.132G>T|ENSP00000393198.2:p.Arg44Ser|331|132|44|R/S|agG/agT|rs2298214||-1||SNV|HGNC|HGNC:24149|YES||2||CCDS44034.1|ENSP00000393198||E9PB28|UPI0001881B51||tolerated_low_confidence(0.72)|benign(0)|MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg|||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS||||||||||||||||||MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614|6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|non_coding_transcript_exon_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000481869|retained_intron|1/2||ENST00000481869.1:n.255G>T||255|||||rs2298214||-1||SNV|HGNC|HGNC:24149|||2|||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000484667|protein_coding||1/2|ENST00000484667.2:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149|||3|||ENSP00000425085||D6REB3|UPI0001D3BBEE|||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|AL645608.7|ENSG00000272512|Transcript|ENST00000606034|lncRNA||||||||||rs2298214|1791|-1||SNV|Clone_based_ensembl_gene||YES|||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624652|protein_coding||||||||||rs2298214|1303|1|cds_end_NF|SNV|HGNC|HGNC:4053|||3|||ENSP00000485313||A0A096LNZ9||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624697|protein_coding||||||||||rs2298214|1296|1||SNV|HGNC|HGNC:4053|||3|A2||ENSP00000485643||A0A096LPJ4||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000000118|promoter||||||||||rs2298214||||SNV||||||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||||||||||;Allele=A,A,A,A,A,A,A,A;Consequence=intron_variant,missense_variant,non_coding_transcript_exon_variant,intron_variant,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,regulatory_region_variant;IMPACT=MODIFIER,MODERATE,MODIFIER,MODIFIER,MODIFIER,MODIFIER,MODIFIER,MODIFIER;SYMBOL=HES4,HES4,HES4,HES4,AL645608.7,ISG15,ISG15,.;Gene=ENSG00000188290,ENSG00000188290,ENSG00000188290,ENSG00000188290,ENSG00000272512,ENSG00000187608,ENSG00000187608,.;Feature_type=Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,RegulatoryFeature;Feature=ENST00000304952,ENST00000428771,ENST00000481869,ENST00000484667,ENST00000606034,ENST00000624652,ENST00000624697,ENSR00000000118;BIOTYPE=protein_coding,protein_coding,retained_intron,protein_coding,lncRNA,protein_coding,protein_coding,promoter;EXON=.,1/3,1/2,.,.,.,.,.;INTRON=1/3,.,.,1/2,.,.,.,.;HGVSc=ENST00000304952.11:c.108+24G>T,ENST00000428771.6:c.132G>T,ENST00000481869.1:n.255G>T,ENST00000484667.2:c.108+24G>T,.,.,.,.;HGVSp=.,ENSP00000393198.2:p.Arg44Ser,.,.,.,.,.,.;cDNA_position=.,331,255,.,.,.,.,.;CDS_position=.,132,.,.,.,.,.,.;Protein_position=.,44,.,.,.,.,.,.;Amino_acids=.,R/S,.,.,.,.,.,.;Codons=.,agG/agT,.,.,.,.,.,.;Existing_variation=rs2298214,rs2298214,rs2298214,rs2298214,rs2298214,rs2298214,rs2298214,rs2298214;DISTANCE=.,.,.,.,1791,1303,1296,.;STRAND=-1,-1,-1,-1,-1,1,1,.;FLAGS=.,.,.,.,.,cds_end_NF,.,.;VARIANT_CLASS=SNV,SNV,SNV,SNV,SNV,SNV,SNV,SNV;SYMBOL_SOURCE=HGNC,HGNC,HGNC,HGNC,Clone_based_ensembl_gene,HGNC,HGNC,.;HGNC_ID=HGNC:24149,HGNC:24149,HGNC:24149,HGNC:24149,.,HGNC:4053,HGNC:4053,.;CANONICAL=.,YES,.,.,YES,.,.,.;MANE=NM_021170.4,.,.,.,.,.,.,.;TSL=1,2,2,3,.,3,3,.;APPRIS=P1,.,.,.,.,.,A2,.;CCDS=CCDS5.1,CCDS44034.1,.,.,.,.,.,.;ENSP=ENSP00000304595,ENSP00000393198,.,ENSP00000425085,.,ENSP00000485313,ENSP00000485643,.;SWISSPROT=Q9HCC6,.,.,.,.,.,.,.;TREMBL=.,E9PB28,.,D6REB3,.,A0A096LNZ9,A0A096LPJ4,.;UNIPARC=.,UPI0001881B51,.,UPI0001D3BBEE,.,.,.,.;GENE_PHENO=.,.,.,.,.,1,1,.;SIFT=.,tolerated_low_confidence(0.72),.,.,.,.,.,.;PolyPhen=.,benign(0),.,.,.,.,.,.;DOMAINS=.,MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg,.,.,.,.,.,.;miRNA=.,.,.,.,.,.,.,.;HGVS_OFFSET=.,.,.,.,.,.,.,.;AF=0.4938,0.4938,0.4938,0.4938,0.4938,0.4938,0.4938,0.4938;AFR_AF=0.0356,0.0356,0.0356,0.0356,0.0356,0.0356,0.0356,0.0356;AMR_AF=0.6268,0.6268,0.6268,0.6268,0.6268,0.6268,0.6268,0.6268;EAS_AF=0.7718,0.7718,0.7718,0.7718,0.7718,0.7718,0.7718,0.7718;EUR_AF=0.5895,0.5895,0.5895,0.5895,0.5895,0.5895,0.5895,0.5895;SAS_AF=0.6339,0.6339,0.6339,0.6339,0.6339,0.6339,0.6339,0.6339;AA_AF=0.1386,0.1386,0.1386,0.1386,0.1386,0.1386,0.1386,0.1386;EA_AF=0.5695,0.5695,0.5695,0.5695,0.5695,0.5695,0.5695,0.5695;gnomAD_AF=0.6136,0.6136,0.6136,0.6136,0.6136,0.6136,0.6136,0.6136;gnomAD_AFR_AF=0.1208,0.1208,0.1208,0.1208,0.1208,0.1208,0.1208,0.1208;gnomAD_AMR_AF=0.7139,0.7139,0.7139,0.7139,0.7139,0.7139,0.7139,0.7139;gnomAD_ASJ_AF=0.6344,0.6344,0.6344,0.6344,0.6344,0.6344,0.6344,0.6344;gnomAD_EAS_AF=0.786,0.786,0.786,0.786,0.786,0.786,0.786,0.786;gnomAD_FIN_AF=0.5555,0.5555,0.5555,0.5555,0.5555,0.5555,0.5555,0.5555;gnomAD_NFE_AF=0.595,0.595,0.595,0.595,0.595,0.595,0.595,0.595;gnomAD_OTH_AF=0.5882,0.5882,0.5882,0.5882,0.5882,0.5882,0.5882,0.5882;gnomAD_SAS_AF=0.6625,0.6625,0.6625,0.6625,0.6625,0.6625,0.6625,0.6625;MAX_AF=0.786,0.786,0.786,0.786,0.786,0.786,0.786,0.786;MAX_AF_POPS=gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS;CLIN_SIG=.,.,.,.,.,.,.,.;SOMATIC=.,.,.,.,.,.,.,.;PHENO=.,.,.,.,.,.,.,.;PUBMED=.,.,.,.,.,.,.,.;MOTIF_NAME=.,.,.,.,.,.,.,.;MOTIF_POS=.,.,.,.,.,.,.,.;HIGH_INF_POS=.,.,.,.,.,.,.,.;MOTIF_SCORE_CHANGE=.,.,.,.,.,.,.,.;existing_InFrame_oORFs=.,.,.,.,.,.,.,.;existing_OutOfFrame_oORFs=.,.,.,.,.,.,.,.;existing_uORFs=.,.,.,.,.,.,.,.;five_prime_UTR_variant_annotation=.,.,.,.,.,.,.,.;five_prime_UTR_variant_consequence=.,.,.,.,.,.,.,.;SpliceRegion=.,.,.,.,.,.,.,.;LoF=.,.,.,.,.,.,.,.;LoF_filter=.,.,.,.,.,.,.,.;LoF_flags=.,.,.,.,.,.,.,.;LoF_info=.,MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614,.,.,.,.,.,.;SpliceAI_pred_DP_AG=6,6,6,6,6,6,6,.;SpliceAI_pred_DP_AL=-37,-37,-37,-37,-37,-37,-37,.;SpliceAI_pred_DP_DG=6,6,6,6,6,6,6,.;SpliceAI_pred_DP_DL=24,24,24,24,24,24,24,.;SpliceAI_pred_DS_AG=0.00,0.00,0.00,0.00,0.00,0.00,0.00,.;SpliceAI_pred_DS_AL=0.00,0.00,0.00,0.00,0.00,0.00,0.00,.;SpliceAI_pred_DS_DG=0.01,0.01,0.01,0.01,0.01,0.01,0.01,.;SpliceAI_pred_DS_DL=0.02,0.02,0.02,0.02,0.02,0.02,0.02,.;SpliceAI_pred_SYMBOL=HES4,HES4,HES4,HES4,HES4,HES4,HES4,. +1 979496 . T C . . CSQ=C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1192A>G|ENSP00000343864.2:p.Ser398Gly|1228|1192|398|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||tolerated(0.2)|benign(0.003)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13302983|3631|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1534A>G|ENSP00000414022.3:p.Ser512Gly|1534|1534|512|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||tolerated(0.18)|benign(0.011)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13302983|2855|-1||SNV|HGNC|HGNC:28208|||1||||||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13302983|3631|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1;Allele=C,C,C,C,C,C,C;Consequence=missense_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,missense_variant,upstream_gene_variant,downstream_gene_variant;IMPACT=MODERATE,MODIFIER,MODIFIER,MODIFIER,MODERATE,MODIFIER,MODIFIER;SYMBOL=PERM1,PLEKHN1,PLEKHN1,PLEKHN1,PERM1,PERM1,PLEKHN1;Gene=ENSG00000187642,ENSG00000187583,ENSG00000187583,ENSG00000187583,ENSG00000187642,ENSG00000187642,ENSG00000187583;Feature_type=Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,Transcript;Feature=ENST00000341290,ENST00000379407,ENST00000379409,ENST00000379410,ENST00000433179,ENST00000479361,ENST00000491024;BIOTYPE=protein_coding,protein_coding,protein_coding,protein_coding,protein_coding,retained_intron,protein_coding;EXON=3/5,.,.,.,1/3,.,.;INTRON=.,.,.,.,.,.,.;HGVSc=ENST00000341290.6:c.1192A>G,.,.,.,ENST00000433179.3:c.1534A>G,.,.;HGVSp=ENSP00000343864.2:p.Ser398Gly,.,.,.,ENSP00000414022.3:p.Ser512Gly,.,.;cDNA_position=1228,.,.,.,1534,.,.;CDS_position=1192,.,.,.,1534,.,.;Protein_position=398,.,.,.,512,.,.;Amino_acids=S/G,.,.,.,S/G,.,.;Codons=Agc/Ggc,.,.,.,Agc/Ggc,.,.;Existing_variation=rs13302983,rs13302983,rs13302983,rs13302983,rs13302983,rs13302983,rs13302983;DISTANCE=.,4488,4488,3631,.,2855,3631;STRAND=-1,1,1,1,-1,-1,1;FLAGS=.,.,.,.,.,.,cds_start_NF;VARIANT_CLASS=SNV,SNV,SNV,SNV,SNV,SNV,SNV;SYMBOL_SOURCE=HGNC,HGNC,HGNC,HGNC,HGNC,HGNC,HGNC;HGNC_ID=HGNC:28208,HGNC:25284,HGNC:25284,HGNC:25284,HGNC:28208,HGNC:28208,HGNC:25284;CANONICAL=.,.,.,YES,YES,.,.;MANE=.,.,.,NM_032129.3,.,.,.;TSL=2,1,2,1,5,1,3;APPRIS=A2,A2,A2,P3,P2,.,.;CCDS=.,CCDS53256.1,.,CCDS4.1,CCDS76083.1,.,.;ENSP=ENSP00000343864,ENSP00000368717,ENSP00000368719,ENSP00000368720,ENSP00000414022,.,ENSP00000462558;SWISSPROT=Q5SV97,Q494U1,Q494U1,Q494U1,Q5SV97,.,.;TREMBL=.,.,.,.,.,.,J3KSM5;UNIPARC=.,.,.,UPI00001416D8,.,.,UPI000268AE1F;GENE_PHENO=.,.,.,.,.,.,.;SIFT=tolerated(0.2),.,.,.,tolerated(0.18),.,.;PolyPhen=benign(0.003),.,.,.,benign(0.011),.,.;DOMAINS=PANTHER:PTHR47282&MobiDB_lite:mobidb-lite,.,.,.,PANTHER:PTHR47282&MobiDB_lite:mobidb-lite,.,.;miRNA=.,.,.,.,.,.,.;HGVS_OFFSET=.,.,.,.,.,.,.;AF=.,.,.,.,.,.,.;AFR_AF=0.9418,0.9418,0.9418,0.9418,0.9418,0.9418,0.9418;AMR_AF=0.9539,0.9539,0.9539,0.9539,0.9539,0.9539,0.9539;EAS_AF=0.999,0.999,0.999,0.999,0.999,0.999,0.999;EUR_AF=0.9592,0.9592,0.9592,0.9592,0.9592,0.9592,0.9592;SAS_AF=0.9847,0.9847,0.9847,0.9847,0.9847,0.9847,0.9847;AA_AF=.,.,.,.,.,.,.;EA_AF=.,.,.,.,.,.,.;gnomAD_AF=0.9721,0.9721,0.9721,0.9721,0.9721,0.9721,0.9721;gnomAD_AFR_AF=0.9533,0.9533,0.9533,0.9533,0.9533,0.9533,0.9533;gnomAD_AMR_AF=0.9754,0.9754,0.9754,0.9754,0.9754,0.9754,0.9754;gnomAD_ASJ_AF=0.9166,0.9166,0.9166,0.9166,0.9166,0.9166,0.9166;gnomAD_EAS_AF=0.9999,0.9999,0.9999,0.9999,0.9999,0.9999,0.9999;gnomAD_FIN_AF=0.9923,0.9923,0.9923,0.9923,0.9923,0.9923,0.9923;gnomAD_NFE_AF=0.9677,0.9677,0.9677,0.9677,0.9677,0.9677,0.9677;gnomAD_OTH_AF=0.9536,0.9536,0.9536,0.9536,0.9536,0.9536,0.9536;gnomAD_SAS_AF=0.9811,0.9811,0.9811,0.9811,0.9811,0.9811,0.9811;MAX_AF=0.9999,0.9999,0.9999,0.9999,0.9999,0.9999,0.9999;MAX_AF_POPS=gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS;CLIN_SIG=.,.,.,.,.,.,.;SOMATIC=.,.,.,.,.,.,.;PHENO=.,.,.,.,.,.,.;PUBMED=.,.,.,.,.,.,.;MOTIF_NAME=.,.,.,.,.,.,.;MOTIF_POS=.,.,.,.,.,.,.;HIGH_INF_POS=.,.,.,.,.,.,.;MOTIF_SCORE_CHANGE=.,.,.,.,.,.,.;existing_InFrame_oORFs=.,.,.,.,.,.,.;existing_OutOfFrame_oORFs=.,.,.,.,.,.,.;existing_uORFs=.,.,.,.,.,.,.;five_prime_UTR_variant_annotation=.,.,.,.,.,.,.;five_prime_UTR_variant_consequence=.,.,.,.,.,.,.;SpliceRegion=.,.,.,.,.,.,.;LoF=.,.,.,.,.,.,.;LoF_filter=.,.,.,.,.,.,.;LoF_flags=.,.,.,.,.,.,.;LoF_info=.,.,.,.,.,.,.;SpliceAI_pred_DP_AG=-27,-27,-27,-27,-27,-27,-27;SpliceAI_pred_DP_AL=-2,-2,-2,-2,-2,-2,-2;SpliceAI_pred_DP_DG=23,23,23,23,23,23,23;SpliceAI_pred_DP_DL=25,25,25,25,25,25,25;SpliceAI_pred_DS_AG=0,0,0,0,0,0,0;SpliceAI_pred_DS_AL=0,0,0,0,0,0,0;SpliceAI_pred_DS_DG=0,0,0,0,0,0,0;SpliceAI_pred_DS_DL=0,0,0,0,0,0,0;SpliceAI_pred_SYMBOL=PERM1,PERM1,PERM1,PERM1,PERM1,PERM1,PERM1 +1 979560 . T C . . CSQ=C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1128A>G|ENSP00000343864.2:p.Ala376%3D|1164|1128|376|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13303033|3695|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1470A>G|ENSP00000414022.3:p.Ala490%3D|1470|1470|490|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13303033|2919|-1||SNV|HGNC|HGNC:28208|||1|||||||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13303033|3695|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1;Allele=C,C,C,C,C,C,C;Consequence=synonymous_variant,downstream_gene_variant,downstream_gene_variant,downstream_gene_variant,synonymous_variant,upstream_gene_variant,downstream_gene_variant;IMPACT=LOW,MODIFIER,MODIFIER,MODIFIER,LOW,MODIFIER,MODIFIER;SYMBOL=PERM1,PLEKHN1,PLEKHN1,PLEKHN1,PERM1,PERM1,PLEKHN1;Gene=ENSG00000187642,ENSG00000187583,ENSG00000187583,ENSG00000187583,ENSG00000187642,ENSG00000187642,ENSG00000187583;Feature_type=Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,Transcript;Feature=ENST00000341290,ENST00000379407,ENST00000379409,ENST00000379410,ENST00000433179,ENST00000479361,ENST00000491024;BIOTYPE=protein_coding,protein_coding,protein_coding,protein_coding,protein_coding,retained_intron,protein_coding;EXON=3/5,.,.,.,1/3,.,.;INTRON=.,.,.,.,.,.,.;HGVSc=ENST00000341290.6:c.1128A>G,.,.,.,ENST00000433179.3:c.1470A>G,.,.;HGVSp=ENSP00000343864.2:p.Ala376%3D,.,.,.,ENSP00000414022.3:p.Ala490%3D,.,.;cDNA_position=1164,.,.,.,1470,.,.;CDS_position=1128,.,.,.,1470,.,.;Protein_position=376,.,.,.,490,.,.;Amino_acids=A,.,.,.,A,.,.;Codons=gcA/gcG,.,.,.,gcA/gcG,.,.;Existing_variation=rs13303033,rs13303033,rs13303033,rs13303033,rs13303033,rs13303033,rs13303033;DISTANCE=.,4552,4552,3695,.,2919,3695;STRAND=-1,1,1,1,-1,-1,1;FLAGS=.,.,.,.,.,.,cds_start_NF;VARIANT_CLASS=SNV,SNV,SNV,SNV,SNV,SNV,SNV;SYMBOL_SOURCE=HGNC,HGNC,HGNC,HGNC,HGNC,HGNC,HGNC;HGNC_ID=HGNC:28208,HGNC:25284,HGNC:25284,HGNC:25284,HGNC:28208,HGNC:28208,HGNC:25284;CANONICAL=.,.,.,YES,YES,.,.;MANE=.,.,.,NM_032129.3,.,.,.;TSL=2,1,2,1,5,1,3;APPRIS=A2,A2,A2,P3,P2,.,.;CCDS=.,CCDS53256.1,.,CCDS4.1,CCDS76083.1,.,.;ENSP=ENSP00000343864,ENSP00000368717,ENSP00000368719,ENSP00000368720,ENSP00000414022,.,ENSP00000462558;SWISSPROT=Q5SV97,Q494U1,Q494U1,Q494U1,Q5SV97,.,.;TREMBL=.,.,.,.,.,.,J3KSM5;UNIPARC=.,.,.,UPI00001416D8,.,.,UPI000268AE1F;GENE_PHENO=.,.,.,.,.,.,.;SIFT=.,.,.,.,.,.,.;PolyPhen=.,.,.,.,.,.,.;DOMAINS=PANTHER:PTHR47282&Low_complexity_(Seg):seg,.,.,.,PANTHER:PTHR47282&Low_complexity_(Seg):seg,.,.;miRNA=.,.,.,.,.,.,.;HGVS_OFFSET=.,.,.,.,.,.,.;AF=0.5096,0.5096,0.5096,0.5096,0.5096,0.5096,0.5096;AFR_AF=0.177,0.177,0.177,0.177,0.177,0.177,0.177;AMR_AF=0.6095,0.6095,0.6095,0.6095,0.6095,0.6095,0.6095;EAS_AF=0.7123,0.7123,0.7123,0.7123,0.7123,0.7123,0.7123;EUR_AF=0.5895,0.5895,0.5895,0.5895,0.5895,0.5895,0.5895;SAS_AF=0.5971,0.5971,0.5971,0.5971,0.5971,0.5971,0.5971;AA_AF=.,.,.,.,.,.,.;EA_AF=.,.,.,.,.,.,.;gnomAD_AF=0.5887,0.5887,0.5887,0.5887,0.5887,0.5887,0.5887;gnomAD_AFR_AF=0.2428,0.2428,0.2428,0.2428,0.2428,0.2428,0.2428;gnomAD_AMR_AF=0.6604,0.6604,0.6604,0.6604,0.6604,0.6604,0.6604;gnomAD_ASJ_AF=0.5617,0.5617,0.5617,0.5617,0.5617,0.5617,0.5617;gnomAD_EAS_AF=0.7237,0.7237,0.7237,0.7237,0.7237,0.7237,0.7237;gnomAD_FIN_AF=0.5334,0.5334,0.5334,0.5334,0.5334,0.5334,0.5334;gnomAD_NFE_AF=0.5801,0.5801,0.5801,0.5801,0.5801,0.5801,0.5801;gnomAD_OTH_AF=0.5647,0.5647,0.5647,0.5647,0.5647,0.5647,0.5647;gnomAD_SAS_AF=0.6213,0.6213,0.6213,0.6213,0.6213,0.6213,0.6213;MAX_AF=0.7237,0.7237,0.7237,0.7237,0.7237,0.7237,0.7237;MAX_AF_POPS=gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS;CLIN_SIG=.,.,.,.,.,.,.;SOMATIC=.,.,.,.,.,.,.;PHENO=.,.,.,.,.,.,.;PUBMED=.,.,.,.,.,.,.;MOTIF_NAME=.,.,.,.,.,.,.;MOTIF_POS=.,.,.,.,.,.,.;HIGH_INF_POS=.,.,.,.,.,.,.;MOTIF_SCORE_CHANGE=.,.,.,.,.,.,.;existing_InFrame_oORFs=.,.,.,.,.,.,.;existing_OutOfFrame_oORFs=.,.,.,.,.,.,.;existing_uORFs=.,.,.,.,.,.,.;five_prime_UTR_variant_annotation=.,.,.,.,.,.,.;five_prime_UTR_variant_consequence=.,.,.,.,.,.,.;SpliceRegion=.,.,.,.,.,.,.;LoF=.,.,.,.,.,.,.;LoF_filter=.,.,.,.,.,.,.;LoF_flags=.,.,.,.,.,.,.;LoF_info=.,.,.,.,.,.,.;SpliceAI_pred_DP_AG=28,28,28,28,28,28,28;SpliceAI_pred_DP_AL=-30,-30,-30,-30,-30,-30,-30;SpliceAI_pred_DP_DG=-39,-39,-39,-39,-39,-39,-39;SpliceAI_pred_DP_DL=3,3,3,3,3,3,3;SpliceAI_pred_DS_AG=0,0,0,0,0,0,0;SpliceAI_pred_DS_AL=0,0,0,0,0,0,0;SpliceAI_pred_DS_DG=0,0,0,0,0,0,0;SpliceAI_pred_DS_DL=0,0,0,0,0,0,0;SpliceAI_pred_SYMBOL=PERM1,PERM1,PERM1,PERM1,PERM1,PERM1,PERM1 +1 999842 . C A . . CSQ=A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000304952|protein_coding||1/3|ENST00000304952.11:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149||NM_021170.4|1|P1|CCDS5.1|ENSP00000304595|Q9HCC6|||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|missense_variant|MODERATE|HES4|ENSG00000188290|Transcript|ENST00000428771|protein_coding|1/3||ENST00000428771.6:c.132G>T|ENSP00000393198.2:p.Arg44Ser|331|132|44|R/S|agG/agT|rs2298214||-1||SNV|HGNC|HGNC:24149|YES||2||CCDS44034.1|ENSP00000393198||E9PB28|UPI0001881B51||tolerated_low_confidence(0.72)|benign(0)|MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg|||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS||||||||||||||||||MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614|6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|non_coding_transcript_exon_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000481869|retained_intron|1/2||ENST00000481869.1:n.255G>T||255|||||rs2298214||-1||SNV|HGNC|HGNC:24149|||2|||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000484667|protein_coding||1/2|ENST00000484667.2:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149|||3|||ENSP00000425085||D6REB3|UPI0001D3BBEE|||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|AL645608.7|ENSG00000272512|Transcript|ENST00000606034|lncRNA||||||||||rs2298214|1791|-1||SNV|Clone_based_ensembl_gene||YES|||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624652|protein_coding||||||||||rs2298214|1303|1|cds_end_NF|SNV|HGNC|HGNC:4053|||3|||ENSP00000485313||A0A096LNZ9||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624697|protein_coding||||||||||rs2298214|1296|1||SNV|HGNC|HGNC:4053|||3|A2||ENSP00000485643||A0A096LPJ4||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000000118|promoter||||||||||rs2298214||||SNV||||||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||||||||||;Allele=A,A,A,A,A,A,A,A;Consequence=intron_variant,missense_variant,non_coding_transcript_exon_variant,intron_variant,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,regulatory_region_variant;IMPACT=MODIFIER,MODERATE,MODIFIER,MODIFIER,MODIFIER,MODIFIER,MODIFIER,MODIFIER;SYMBOL=HES4,HES4,HES4,HES4,AL645608.7,ISG15,ISG15,.;Gene=ENSG00000188290,ENSG00000188290,ENSG00000188290,ENSG00000188290,ENSG00000272512,ENSG00000187608,ENSG00000187608,.;Feature_type=Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,Transcript,RegulatoryFeature;Feature=ENST00000304952,ENST00000428771,ENST00000481869,ENST00000484667,ENST00000606034,ENST00000624652,ENST00000624697,ENSR00000000118;BIOTYPE=protein_coding,protein_coding,retained_intron,protein_coding,lncRNA,protein_coding,protein_coding,promoter;EXON=.,1/3,1/2,.,.,.,.,.;INTRON=1/3,.,.,1/2,.,.,.,.;HGVSc=ENST00000304952.11:c.108+24G>T,ENST00000428771.6:c.132G>T,ENST00000481869.1:n.255G>T,ENST00000484667.2:c.108+24G>T,.,.,.,.;HGVSp=.,ENSP00000393198.2:p.Arg44Ser,.,.,.,.,.,.;cDNA_position=.,331,255,.,.,.,.,.;CDS_position=.,132,.,.,.,.,.,.;Protein_position=.,44,.,.,.,.,.,.;Amino_acids=.,R/S,.,.,.,.,.,.;Codons=.,agG/agT,.,.,.,.,.,.;Existing_variation=rs2298214,rs2298214,rs2298214,rs2298214,rs2298214,rs2298214,rs2298214,rs2298214;DISTANCE=.,.,.,.,1791,1303,1296,.;STRAND=-1,-1,-1,-1,-1,1,1,.;FLAGS=.,.,.,.,.,cds_end_NF,.,.;VARIANT_CLASS=SNV,SNV,SNV,SNV,SNV,SNV,SNV,SNV;SYMBOL_SOURCE=HGNC,HGNC,HGNC,HGNC,Clone_based_ensembl_gene,HGNC,HGNC,.;HGNC_ID=HGNC:24149,HGNC:24149,HGNC:24149,HGNC:24149,.,HGNC:4053,HGNC:4053,.;CANONICAL=.,YES,.,.,YES,.,.,.;MANE=NM_021170.4,.,.,.,.,.,.,.;TSL=1,2,2,3,.,3,3,.;APPRIS=P1,.,.,.,.,.,A2,.;CCDS=CCDS5.1,CCDS44034.1,.,.,.,.,.,.;ENSP=ENSP00000304595,ENSP00000393198,.,ENSP00000425085,.,ENSP00000485313,ENSP00000485643,.;SWISSPROT=Q9HCC6,.,.,.,.,.,.,.;TREMBL=.,E9PB28,.,D6REB3,.,A0A096LNZ9,A0A096LPJ4,.;UNIPARC=.,UPI0001881B51,.,UPI0001D3BBEE,.,.,.,.;GENE_PHENO=.,.,.,.,.,1,1,.;SIFT=.,tolerated_low_confidence(0.72),.,.,.,.,.,.;PolyPhen=.,benign(0),.,.,.,.,.,.;DOMAINS=.,MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg,.,.,.,.,.,.;miRNA=.,.,.,.,.,.,.,.;HGVS_OFFSET=.,.,.,.,.,.,.,.;AF=0.4938,0.4938,0.4938,0.4938,0.4938,0.4938,0.4938,0.4938;AFR_AF=0.0356,0.0356,0.0356,0.0356,0.0356,0.0356,0.0356,0.0356;AMR_AF=0.6268,0.6268,0.6268,0.6268,0.6268,0.6268,0.6268,0.6268;EAS_AF=0.7718,0.7718,0.7718,0.7718,0.7718,0.7718,0.7718,0.7718;EUR_AF=0.5895,0.5895,0.5895,0.5895,0.5895,0.5895,0.5895,0.5895;SAS_AF=0.6339,0.6339,0.6339,0.6339,0.6339,0.6339,0.6339,0.6339;AA_AF=0.1386,0.1386,0.1386,0.1386,0.1386,0.1386,0.1386,0.1386;EA_AF=0.5695,0.5695,0.5695,0.5695,0.5695,0.5695,0.5695,0.5695;gnomAD_AF=0.6136,0.6136,0.6136,0.6136,0.6136,0.6136,0.6136,0.6136;gnomAD_AFR_AF=0.1208,0.1208,0.1208,0.1208,0.1208,0.1208,0.1208,0.1208;gnomAD_AMR_AF=0.7139,0.7139,0.7139,0.7139,0.7139,0.7139,0.7139,0.7139;gnomAD_ASJ_AF=0.6344,0.6344,0.6344,0.6344,0.6344,0.6344,0.6344,0.6344;gnomAD_EAS_AF=0.786,0.786,0.786,0.786,0.786,0.786,0.786,0.786;gnomAD_FIN_AF=0.5555,0.5555,0.5555,0.5555,0.5555,0.5555,0.5555,0.5555;gnomAD_NFE_AF=0.595,0.595,0.595,0.595,0.595,0.595,0.595,0.595;gnomAD_OTH_AF=0.5882,0.5882,0.5882,0.5882,0.5882,0.5882,0.5882,0.5882;gnomAD_SAS_AF=0.6625,0.6625,0.6625,0.6625,0.6625,0.6625,0.6625,0.6625;MAX_AF=0.786,0.786,0.786,0.786,0.786,0.786,0.786,0.786;MAX_AF_POPS=gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS,gnomAD_EAS;CLIN_SIG=.,.,.,.,.,.,.,.;SOMATIC=.,.,.,.,.,.,.,.;PHENO=.,.,.,.,.,.,.,.;PUBMED=.,.,.,.,.,.,.,.;MOTIF_NAME=.,.,.,.,.,.,.,.;MOTIF_POS=.,.,.,.,.,.,.,.;HIGH_INF_POS=.,.,.,.,.,.,.,.;MOTIF_SCORE_CHANGE=.,.,.,.,.,.,.,.;existing_InFrame_oORFs=.,.,.,.,.,.,.,.;existing_OutOfFrame_oORFs=.,.,.,.,.,.,.,.;existing_uORFs=.,.,.,.,.,.,.,.;five_prime_UTR_variant_annotation=.,.,.,.,.,.,.,.;five_prime_UTR_variant_consequence=.,.,.,.,.,.,.,.;SpliceRegion=.,.,.,.,.,.,.,.;LoF=.,.,.,.,.,.,.,.;LoF_filter=.,.,.,.,.,.,.,.;LoF_flags=.,.,.,.,.,.,.,.;LoF_info=.,MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614,.,.,.,.,.,.;SpliceAI_pred_DP_AG=6,6,6,6,6,6,6,.;SpliceAI_pred_DP_AL=-37,-37,-37,-37,-37,-37,-37,.;SpliceAI_pred_DP_DG=6,6,6,6,6,6,6,.;SpliceAI_pred_DP_DL=24,24,24,24,24,24,24,.;SpliceAI_pred_DS_AG=0,0,0,0,0,0,0,.;SpliceAI_pred_DS_AL=0,0,0,0,0,0,0,.;SpliceAI_pred_DS_DG=0.01,0.01,0.01,0.01,0.01,0.01,0.01,.;SpliceAI_pred_DS_DL=0.02,0.02,0.02,0.02,0.02,0.02,0.02,.;SpliceAI_pred_SYMBOL=HES4,HES4,HES4,HES4,HES4,HES4,HES4,. 1 12716215 . C A . . CSQ=A|stop_gained|HIGH|AADACL3|ENSG00000188984|Transcript|ENST00000359318|protein_coding|1/4||ENST00000359318.8:c.39C>A|ENSP00000352268.6:p.Cys13Ter|106|39|13|C/*|tgC/tgA|rs3000860||1||SNV|HGNC|HGNC:32037|YES|NM_001103170.3|3|P1|CCDS41253.2|ENSP00000352268|Q5VUY0||UPI0003EAEC70||||PIRSF:PIRSF037251&PANTHER:PTHR23024&PANTHER:PTHR23024:SF115&Transmembrane_helices:TMhelix|||0.1436|0.1188|0.1052|0.2679|0.1362|0.0838||||||||||||0.2679|EAS|||||||||||||||HC|||GERP_DIST:92.57575&BP_DIST:1182&PERCENTILE:0.0318627450980392&DIST_FROM_LAST_EXON:408&50_BP_RULE:PASS&PHYLOCSF_TOO_SHORT|||||||||,A|non_coding_transcript_exon_variant|MODIFIER|AADACL3|ENSG00000188984|Transcript|ENST00000620146|processed_transcript|1/3||ENST00000620146.2:n.101C>A||101|||||rs3000860||1||SNV|HGNC|HGNC:32037|||2|||||||||||||0.1436|0.1188|0.1052|0.2679|0.1362|0.0838||||||||||||0.2679|EAS|||||||||||||||||||||||||||,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000347677|promoter_flanking_region||||||||||rs3000860||||SNV||||||||||||||||||0.1436|0.1188|0.1052|0.2679|0.1362|0.0838||||||||||||0.2679|EAS|||||||||||||||||||||||||||;Allele=A,A,A;Consequence=stop_gained,non_coding_transcript_exon_variant,regulatory_region_variant;IMPACT=HIGH,MODIFIER,MODIFIER;SYMBOL=AADACL3,AADACL3,.;Gene=ENSG00000188984,ENSG00000188984,.;Feature_type=Transcript,Transcript,RegulatoryFeature;Feature=ENST00000359318,ENST00000620146,ENSR00000347677;BIOTYPE=protein_coding,processed_transcript,promoter_flanking_region;EXON=1/4,1/3,.;INTRON=.,.,.;HGVSc=ENST00000359318.8:c.39C>A,ENST00000620146.2:n.101C>A,.;HGVSp=ENSP00000352268.6:p.Cys13Ter,.,.;cDNA_position=106,101,.;CDS_position=39,.,.;Protein_position=13,.,.;Amino_acids=C/*,.,.;Codons=tgC/tgA,.,.;Existing_variation=rs3000860,rs3000860,rs3000860;DISTANCE=.,.,.;STRAND=1,1,.;FLAGS=.,.,.;VARIANT_CLASS=SNV,SNV,SNV;SYMBOL_SOURCE=HGNC,HGNC,.;HGNC_ID=HGNC:32037,HGNC:32037,.;CANONICAL=YES,.,.;MANE=NM_001103170.3,.,.;TSL=3,2,.;APPRIS=P1,.,.;CCDS=CCDS41253.2,.,.;ENSP=ENSP00000352268,.,.;SWISSPROT=Q5VUY0,.,.;TREMBL=.,.,.;UNIPARC=UPI0003EAEC70,.,.;GENE_PHENO=.,.,.;SIFT=.,.,.;PolyPhen=.,.,.;DOMAINS=PIRSF:PIRSF037251&PANTHER:PTHR23024&PANTHER:PTHR23024:SF115&Transmembrane_helices:TMhelix,.,.;miRNA=.,.,.;HGVS_OFFSET=.,.,.;AF=0.1436,0.1436,0.1436;AFR_AF=0.1188,0.1188,0.1188;AMR_AF=0.1052,0.1052,0.1052;EAS_AF=0.2679,0.2679,0.2679;EUR_AF=0.1362,0.1362,0.1362;SAS_AF=0.0838,0.0838,0.0838;AA_AF=.,.,.;EA_AF=.,.,.;gnomAD_AF=.,.,.;gnomAD_AFR_AF=.,.,.;gnomAD_AMR_AF=.,.,.;gnomAD_ASJ_AF=.,.,.;gnomAD_EAS_AF=.,.,.;gnomAD_FIN_AF=.,.,.;gnomAD_NFE_AF=.,.,.;gnomAD_OTH_AF=.,.,.;gnomAD_SAS_AF=.,.,.;MAX_AF=0.2679,0.2679,0.2679;MAX_AF_POPS=EAS,EAS,EAS;CLIN_SIG=.,.,.;SOMATIC=.,.,.;PHENO=.,.,.;PUBMED=.,.,.;MOTIF_NAME=.,.,.;MOTIF_POS=.,.,.;HIGH_INF_POS=.,.,.;MOTIF_SCORE_CHANGE=.,.,.;existing_InFrame_oORFs=.,.,.;existing_OutOfFrame_oORFs=.,.,.;existing_uORFs=.,.,.;five_prime_UTR_variant_annotation=.,.,.;five_prime_UTR_variant_consequence=.,.,.;SpliceRegion=.,.,.;LoF=HC,.,.;LoF_filter=.,.,.;LoF_flags=.,.,.;LoF_info=GERP_DIST:92.57575&BP_DIST:1182&PERCENTILE:0.0318627450980392&DIST_FROM_LAST_EXON:408&50_BP_RULE:PASS&PHYLOCSF_TOO_SHORT,.,.;SpliceAI_pred_DP_AG=.,.,.;SpliceAI_pred_DP_AL=.,.,.;SpliceAI_pred_DP_DG=.,.,.;SpliceAI_pred_DP_DL=.,.,.;SpliceAI_pred_DS_AG=.,.,.;SpliceAI_pred_DS_AL=.,.,.;SpliceAI_pred_DS_DG=.,.,.;SpliceAI_pred_DS_DL=.,.,.;SpliceAI_pred_SYMBOL=.,.,. diff --git a/test/split-vep.19.out b/test/split-vep.19.out index 1875d1045..609e82135 100644 --- a/test/split-vep.19.out +++ b/test/split-vep.19.out @@ -1,4 +1,4 @@ -1 979496 . T C . . CSQ=C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1192A>G|ENSP00000343864.2:p.Ser398Gly|1228|1192|398|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||tolerated(0.2)|benign(0.003)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13302983|3631|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1534A>G|ENSP00000414022.3:p.Ser512Gly|1534|1534|512|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||tolerated(0.18)|benign(0.011)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13302983|2855|-1||SNV|HGNC|HGNC:28208|||1||||||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13302983|3631|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1;Allele=C;Consequence=missense_variant;IMPACT=MODERATE;SYMBOL=PERM1;Gene=ENSG00000187642;Feature_type=Transcript;Feature=ENST00000341290;BIOTYPE=protein_coding;EXON=3/5;INTRON=.;HGVSc=ENST00000341290.6:c.1192A>G;HGVSp=ENSP00000343864.2:p.Ser398Gly;cDNA_position=1228;CDS_position=1192;Protein_position=398;Amino_acids=S/G;Codons=Agc/Ggc;Existing_variation=rs13302983;DISTANCE=.;STRAND=-1;FLAGS=.;VARIANT_CLASS=SNV;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:28208;CANONICAL=.;MANE=.;TSL=2;APPRIS=A2;CCDS=.;ENSP=ENSP00000343864;SWISSPROT=Q5SV97;TREMBL=.;UNIPARC=.;GENE_PHENO=.;SIFT=tolerated(0.2);PolyPhen=benign(0.003);DOMAINS=PANTHER:PTHR47282&MobiDB_lite:mobidb-lite;miRNA=.;HGVS_OFFSET=.;AF=.;AFR_AF=0.9418;AMR_AF=0.9539;EAS_AF=0.999;EUR_AF=0.9592;SAS_AF=0.9847;AA_AF=.;EA_AF=.;gnomAD_AF=0.9721;gnomAD_AFR_AF=0.9533;gnomAD_AMR_AF=0.9754;gnomAD_ASJ_AF=0.9166;gnomAD_EAS_AF=0.9999;gnomAD_FIN_AF=0.9923;gnomAD_NFE_AF=0.9677;gnomAD_OTH_AF=0.9536;gnomAD_SAS_AF=0.9811;MAX_AF=0.9999;MAX_AF_POPS=gnomAD_EAS;CLIN_SIG=.;SOMATIC=.;PHENO=.;PUBMED=.;MOTIF_NAME=.;MOTIF_POS=.;HIGH_INF_POS=.;MOTIF_SCORE_CHANGE=.;existing_InFrame_oORFs=.;existing_OutOfFrame_oORFs=.;existing_uORFs=.;five_prime_UTR_variant_annotation=.;five_prime_UTR_variant_consequence=.;SpliceRegion=.;LoF=.;LoF_filter=.;LoF_flags=.;LoF_info=.;SpliceAI_pred_DP_AG=-27;SpliceAI_pred_DP_AL=-2;SpliceAI_pred_DP_DG=23;SpliceAI_pred_DP_DL=25;SpliceAI_pred_DS_AG=0.00;SpliceAI_pred_DS_AL=0.00;SpliceAI_pred_DS_DG=0.00;SpliceAI_pred_DS_DL=0.00;SpliceAI_pred_SYMBOL=PERM1 -1 979560 . T C . . CSQ=C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1128A>G|ENSP00000343864.2:p.Ala376%3D|1164|1128|376|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13303033|3695|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1470A>G|ENSP00000414022.3:p.Ala490%3D|1470|1470|490|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13303033|2919|-1||SNV|HGNC|HGNC:28208|||1|||||||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13303033|3695|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1;Allele=C;Consequence=synonymous_variant;IMPACT=LOW;SYMBOL=PERM1;Gene=ENSG00000187642;Feature_type=Transcript;Feature=ENST00000341290;BIOTYPE=protein_coding;EXON=3/5;INTRON=.;HGVSc=ENST00000341290.6:c.1128A>G;HGVSp=ENSP00000343864.2:p.Ala376%3D;cDNA_position=1164;CDS_position=1128;Protein_position=376;Amino_acids=A;Codons=gcA/gcG;Existing_variation=rs13303033;DISTANCE=.;STRAND=-1;FLAGS=.;VARIANT_CLASS=SNV;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:28208;CANONICAL=.;MANE=.;TSL=2;APPRIS=A2;CCDS=.;ENSP=ENSP00000343864;SWISSPROT=Q5SV97;TREMBL=.;UNIPARC=.;GENE_PHENO=.;SIFT=.;PolyPhen=.;DOMAINS=PANTHER:PTHR47282&Low_complexity_(Seg):seg;miRNA=.;HGVS_OFFSET=.;AF=0.5096;AFR_AF=0.177;AMR_AF=0.6095;EAS_AF=0.7123;EUR_AF=0.5895;SAS_AF=0.5971;AA_AF=.;EA_AF=.;gnomAD_AF=0.5887;gnomAD_AFR_AF=0.2428;gnomAD_AMR_AF=0.6604;gnomAD_ASJ_AF=0.5617;gnomAD_EAS_AF=0.7237;gnomAD_FIN_AF=0.5334;gnomAD_NFE_AF=0.5801;gnomAD_OTH_AF=0.5647;gnomAD_SAS_AF=0.6213;MAX_AF=0.7237;MAX_AF_POPS=gnomAD_EAS;CLIN_SIG=.;SOMATIC=.;PHENO=.;PUBMED=.;MOTIF_NAME=.;MOTIF_POS=.;HIGH_INF_POS=.;MOTIF_SCORE_CHANGE=.;existing_InFrame_oORFs=.;existing_OutOfFrame_oORFs=.;existing_uORFs=.;five_prime_UTR_variant_annotation=.;five_prime_UTR_variant_consequence=.;SpliceRegion=.;LoF=.;LoF_filter=.;LoF_flags=.;LoF_info=.;SpliceAI_pred_DP_AG=28;SpliceAI_pred_DP_AL=-30;SpliceAI_pred_DP_DG=-39;SpliceAI_pred_DP_DL=3;SpliceAI_pred_DS_AG=0.00;SpliceAI_pred_DS_AL=0.00;SpliceAI_pred_DS_DG=0.00;SpliceAI_pred_DS_DL=0.00;SpliceAI_pred_SYMBOL=PERM1 -1 999842 . C A . . CSQ=A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000304952|protein_coding||1/3|ENST00000304952.11:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149||NM_021170.4|1|P1|CCDS5.1|ENSP00000304595|Q9HCC6|||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|missense_variant|MODERATE|HES4|ENSG00000188290|Transcript|ENST00000428771|protein_coding|1/3||ENST00000428771.6:c.132G>T|ENSP00000393198.2:p.Arg44Ser|331|132|44|R/S|agG/agT|rs2298214||-1||SNV|HGNC|HGNC:24149|YES||2||CCDS44034.1|ENSP00000393198||E9PB28|UPI0001881B51||tolerated_low_confidence(0.72)|benign(0)|MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg|||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS||||||||||||||||||MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614|6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|non_coding_transcript_exon_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000481869|retained_intron|1/2||ENST00000481869.1:n.255G>T||255|||||rs2298214||-1||SNV|HGNC|HGNC:24149|||2|||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000484667|protein_coding||1/2|ENST00000484667.2:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149|||3|||ENSP00000425085||D6REB3|UPI0001D3BBEE|||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|AL645608.7|ENSG00000272512|Transcript|ENST00000606034|lncRNA||||||||||rs2298214|1791|-1||SNV|Clone_based_ensembl_gene||YES|||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624652|protein_coding||||||||||rs2298214|1303|1|cds_end_NF|SNV|HGNC|HGNC:4053|||3|||ENSP00000485313||A0A096LNZ9||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624697|protein_coding||||||||||rs2298214|1296|1||SNV|HGNC|HGNC:4053|||3|A2||ENSP00000485643||A0A096LPJ4||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000000118|promoter||||||||||rs2298214||||SNV||||||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||||||||||;Allele=A;Consequence=missense_variant;IMPACT=MODERATE;SYMBOL=HES4;Gene=ENSG00000188290;Feature_type=Transcript;Feature=ENST00000428771;BIOTYPE=protein_coding;EXON=1/3;INTRON=.;HGVSc=ENST00000428771.6:c.132G>T;HGVSp=ENSP00000393198.2:p.Arg44Ser;cDNA_position=331;CDS_position=132;Protein_position=44;Amino_acids=R/S;Codons=agG/agT;Existing_variation=rs2298214;DISTANCE=.;STRAND=-1;FLAGS=.;VARIANT_CLASS=SNV;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:24149;CANONICAL=YES;MANE=.;TSL=2;APPRIS=.;CCDS=CCDS44034.1;ENSP=ENSP00000393198;SWISSPROT=.;TREMBL=E9PB28;UNIPARC=UPI0001881B51;GENE_PHENO=.;SIFT=tolerated_low_confidence(0.72);PolyPhen=benign(0);DOMAINS=MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg;miRNA=.;HGVS_OFFSET=.;AF=0.4938;AFR_AF=0.0356;AMR_AF=0.6268;EAS_AF=0.7718;EUR_AF=0.5895;SAS_AF=0.6339;AA_AF=0.1386;EA_AF=0.5695;gnomAD_AF=0.6136;gnomAD_AFR_AF=0.1208;gnomAD_AMR_AF=0.7139;gnomAD_ASJ_AF=0.6344;gnomAD_EAS_AF=0.786;gnomAD_FIN_AF=0.5555;gnomAD_NFE_AF=0.595;gnomAD_OTH_AF=0.5882;gnomAD_SAS_AF=0.6625;MAX_AF=0.786;MAX_AF_POPS=gnomAD_EAS;CLIN_SIG=.;SOMATIC=.;PHENO=.;PUBMED=.;MOTIF_NAME=.;MOTIF_POS=.;HIGH_INF_POS=.;MOTIF_SCORE_CHANGE=.;existing_InFrame_oORFs=.;existing_OutOfFrame_oORFs=.;existing_uORFs=.;five_prime_UTR_variant_annotation=.;five_prime_UTR_variant_consequence=.;SpliceRegion=.;LoF=.;LoF_filter=.;LoF_flags=.;LoF_info=MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614;SpliceAI_pred_DP_AG=6;SpliceAI_pred_DP_AL=-37;SpliceAI_pred_DP_DG=6;SpliceAI_pred_DP_DL=24;SpliceAI_pred_DS_AG=0.00;SpliceAI_pred_DS_AL=0.00;SpliceAI_pred_DS_DG=0.01;SpliceAI_pred_DS_DL=0.02;SpliceAI_pred_SYMBOL=HES4 +1 979496 . T C . . CSQ=C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1192A>G|ENSP00000343864.2:p.Ser398Gly|1228|1192|398|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||tolerated(0.2)|benign(0.003)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13302983|4488|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13302983|3631|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|missense_variant|MODERATE|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1534A>G|ENSP00000414022.3:p.Ser512Gly|1534|1534|512|S/G|Agc/Ggc|rs13302983||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||tolerated(0.18)|benign(0.011)|PANTHER:PTHR47282&MobiDB_lite:mobidb-lite||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13302983|2855|-1||SNV|HGNC|HGNC:28208|||1||||||||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13302983|3631|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F||||||||0.9418|0.9539|0.999|0.9592|0.9847|||0.9721|0.9533|0.9754|0.9166|0.9999|0.9923|0.9677|0.9536|0.9811|0.9999|gnomAD_EAS|||||||||||||||||||-27|-2|23|25|0.00|0.00|0.00|0.00|PERM1;Allele=C;Consequence=missense_variant;IMPACT=MODERATE;SYMBOL=PERM1;Gene=ENSG00000187642;Feature_type=Transcript;Feature=ENST00000341290;BIOTYPE=protein_coding;EXON=3/5;INTRON=.;HGVSc=ENST00000341290.6:c.1192A>G;HGVSp=ENSP00000343864.2:p.Ser398Gly;cDNA_position=1228;CDS_position=1192;Protein_position=398;Amino_acids=S/G;Codons=Agc/Ggc;Existing_variation=rs13302983;DISTANCE=.;STRAND=-1;FLAGS=.;VARIANT_CLASS=SNV;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:28208;CANONICAL=.;MANE=.;TSL=2;APPRIS=A2;CCDS=.;ENSP=ENSP00000343864;SWISSPROT=Q5SV97;TREMBL=.;UNIPARC=.;GENE_PHENO=.;SIFT=tolerated(0.2);PolyPhen=benign(0.003);DOMAINS=PANTHER:PTHR47282&MobiDB_lite:mobidb-lite;miRNA=.;HGVS_OFFSET=.;AF=.;AFR_AF=0.9418;AMR_AF=0.9539;EAS_AF=0.999;EUR_AF=0.9592;SAS_AF=0.9847;AA_AF=.;EA_AF=.;gnomAD_AF=0.9721;gnomAD_AFR_AF=0.9533;gnomAD_AMR_AF=0.9754;gnomAD_ASJ_AF=0.9166;gnomAD_EAS_AF=0.9999;gnomAD_FIN_AF=0.9923;gnomAD_NFE_AF=0.9677;gnomAD_OTH_AF=0.9536;gnomAD_SAS_AF=0.9811;MAX_AF=0.9999;MAX_AF_POPS=gnomAD_EAS;CLIN_SIG=.;SOMATIC=.;PHENO=.;PUBMED=.;MOTIF_NAME=.;MOTIF_POS=.;HIGH_INF_POS=.;MOTIF_SCORE_CHANGE=.;existing_InFrame_oORFs=.;existing_OutOfFrame_oORFs=.;existing_uORFs=.;five_prime_UTR_variant_annotation=.;five_prime_UTR_variant_consequence=.;SpliceRegion=.;LoF=.;LoF_filter=.;LoF_flags=.;LoF_info=.;SpliceAI_pred_DP_AG=-27;SpliceAI_pred_DP_AL=-2;SpliceAI_pred_DP_DG=23;SpliceAI_pred_DP_DL=25;SpliceAI_pred_DS_AG=0;SpliceAI_pred_DS_AL=0;SpliceAI_pred_DS_DG=0;SpliceAI_pred_DS_DL=0;SpliceAI_pred_SYMBOL=PERM1 +1 979560 . T C . . CSQ=C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000341290|protein_coding|3/5||ENST00000341290.6:c.1128A>G|ENSP00000343864.2:p.Ala376%3D|1164|1128|376|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|||2|A2||ENSP00000343864|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379407|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||1|A2|CCDS53256.1|ENSP00000368717|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379409|protein_coding||||||||||rs13303033|4552|1||SNV|HGNC|HGNC:25284|||2|A2||ENSP00000368719|Q494U1|||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000379410|protein_coding||||||||||rs13303033|3695|1||SNV|HGNC|HGNC:25284|YES|NM_032129.3|1|P3|CCDS4.1|ENSP00000368720|Q494U1||UPI00001416D8|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|synonymous_variant|LOW|PERM1|ENSG00000187642|Transcript|ENST00000433179|protein_coding|1/3||ENST00000433179.3:c.1470A>G|ENSP00000414022.3:p.Ala490%3D|1470|1470|490|A|gcA/gcG|rs13303033||-1||SNV|HGNC|HGNC:28208|YES||5|P2|CCDS76083.1|ENSP00000414022|Q5SV97||||||PANTHER:PTHR47282&Low_complexity_(Seg):seg|||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|upstream_gene_variant|MODIFIER|PERM1|ENSG00000187642|Transcript|ENST00000479361|retained_intron||||||||||rs13303033|2919|-1||SNV|HGNC|HGNC:28208|||1|||||||||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1,C|downstream_gene_variant|MODIFIER|PLEKHN1|ENSG00000187583|Transcript|ENST00000491024|protein_coding||||||||||rs13303033|3695|1|cds_start_NF|SNV|HGNC|HGNC:25284|||3|||ENSP00000462558||J3KSM5|UPI000268AE1F|||||||0.5096|0.177|0.6095|0.7123|0.5895|0.5971|||0.5887|0.2428|0.6604|0.5617|0.7237|0.5334|0.5801|0.5647|0.6213|0.7237|gnomAD_EAS|||||||||||||||||||28|-30|-39|3|0.00|0.00|0.00|0.00|PERM1;Allele=C;Consequence=synonymous_variant;IMPACT=LOW;SYMBOL=PERM1;Gene=ENSG00000187642;Feature_type=Transcript;Feature=ENST00000341290;BIOTYPE=protein_coding;EXON=3/5;INTRON=.;HGVSc=ENST00000341290.6:c.1128A>G;HGVSp=ENSP00000343864.2:p.Ala376%3D;cDNA_position=1164;CDS_position=1128;Protein_position=376;Amino_acids=A;Codons=gcA/gcG;Existing_variation=rs13303033;DISTANCE=.;STRAND=-1;FLAGS=.;VARIANT_CLASS=SNV;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:28208;CANONICAL=.;MANE=.;TSL=2;APPRIS=A2;CCDS=.;ENSP=ENSP00000343864;SWISSPROT=Q5SV97;TREMBL=.;UNIPARC=.;GENE_PHENO=.;SIFT=.;PolyPhen=.;DOMAINS=PANTHER:PTHR47282&Low_complexity_(Seg):seg;miRNA=.;HGVS_OFFSET=.;AF=0.5096;AFR_AF=0.177;AMR_AF=0.6095;EAS_AF=0.7123;EUR_AF=0.5895;SAS_AF=0.5971;AA_AF=.;EA_AF=.;gnomAD_AF=0.5887;gnomAD_AFR_AF=0.2428;gnomAD_AMR_AF=0.6604;gnomAD_ASJ_AF=0.5617;gnomAD_EAS_AF=0.7237;gnomAD_FIN_AF=0.5334;gnomAD_NFE_AF=0.5801;gnomAD_OTH_AF=0.5647;gnomAD_SAS_AF=0.6213;MAX_AF=0.7237;MAX_AF_POPS=gnomAD_EAS;CLIN_SIG=.;SOMATIC=.;PHENO=.;PUBMED=.;MOTIF_NAME=.;MOTIF_POS=.;HIGH_INF_POS=.;MOTIF_SCORE_CHANGE=.;existing_InFrame_oORFs=.;existing_OutOfFrame_oORFs=.;existing_uORFs=.;five_prime_UTR_variant_annotation=.;five_prime_UTR_variant_consequence=.;SpliceRegion=.;LoF=.;LoF_filter=.;LoF_flags=.;LoF_info=.;SpliceAI_pred_DP_AG=28;SpliceAI_pred_DP_AL=-30;SpliceAI_pred_DP_DG=-39;SpliceAI_pred_DP_DL=3;SpliceAI_pred_DS_AG=0;SpliceAI_pred_DS_AL=0;SpliceAI_pred_DS_DG=0;SpliceAI_pred_DS_DL=0;SpliceAI_pred_SYMBOL=PERM1 +1 999842 . C A . . CSQ=A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000304952|protein_coding||1/3|ENST00000304952.11:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149||NM_021170.4|1|P1|CCDS5.1|ENSP00000304595|Q9HCC6|||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|missense_variant|MODERATE|HES4|ENSG00000188290|Transcript|ENST00000428771|protein_coding|1/3||ENST00000428771.6:c.132G>T|ENSP00000393198.2:p.Arg44Ser|331|132|44|R/S|agG/agT|rs2298214||-1||SNV|HGNC|HGNC:24149|YES||2||CCDS44034.1|ENSP00000393198||E9PB28|UPI0001881B51||tolerated_low_confidence(0.72)|benign(0)|MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg|||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS||||||||||||||||||MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614|6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|non_coding_transcript_exon_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000481869|retained_intron|1/2||ENST00000481869.1:n.255G>T||255|||||rs2298214||-1||SNV|HGNC|HGNC:24149|||2|||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|intron_variant|MODIFIER|HES4|ENSG00000188290|Transcript|ENST00000484667|protein_coding||1/2|ENST00000484667.2:c.108+24G>T|||||||rs2298214||-1||SNV|HGNC|HGNC:24149|||3|||ENSP00000425085||D6REB3|UPI0001D3BBEE|||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|AL645608.7|ENSG00000272512|Transcript|ENST00000606034|lncRNA||||||||||rs2298214|1791|-1||SNV|Clone_based_ensembl_gene||YES|||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624652|protein_coding||||||||||rs2298214|1303|1|cds_end_NF|SNV|HGNC|HGNC:4053|||3|||ENSP00000485313||A0A096LNZ9||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|upstream_gene_variant|MODIFIER|ISG15|ENSG00000187608|Transcript|ENST00000624697|protein_coding||||||||||rs2298214|1296|1||SNV|HGNC|HGNC:4053|||3|A2||ENSP00000485643||A0A096LPJ4||1||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||6|-37|6|24|0.00|0.00|0.01|0.02|HES4,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000000118|promoter||||||||||rs2298214||||SNV||||||||||||||||||0.4938|0.0356|0.6268|0.7718|0.5895|0.6339|0.1386|0.5695|0.6136|0.1208|0.7139|0.6344|0.786|0.5555|0.595|0.5882|0.6625|0.786|gnomAD_EAS|||||||||||||||||||||||||||;Allele=A;Consequence=missense_variant;IMPACT=MODERATE;SYMBOL=HES4;Gene=ENSG00000188290;Feature_type=Transcript;Feature=ENST00000428771;BIOTYPE=protein_coding;EXON=1/3;INTRON=.;HGVSc=ENST00000428771.6:c.132G>T;HGVSp=ENSP00000393198.2:p.Arg44Ser;cDNA_position=331;CDS_position=132;Protein_position=44;Amino_acids=R/S;Codons=agG/agT;Existing_variation=rs2298214;DISTANCE=.;STRAND=-1;FLAGS=.;VARIANT_CLASS=SNV;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:24149;CANONICAL=YES;MANE=.;TSL=2;APPRIS=.;CCDS=CCDS44034.1;ENSP=ENSP00000393198;SWISSPROT=.;TREMBL=E9PB28;UNIPARC=UPI0001881B51;GENE_PHENO=.;SIFT=tolerated_low_confidence(0.72);PolyPhen=benign(0);DOMAINS=MobiDB_lite:mobidb-lite&Low_complexity_(Seg):seg;miRNA=.;HGVS_OFFSET=.;AF=0.4938;AFR_AF=0.0356;AMR_AF=0.6268;EAS_AF=0.7718;EUR_AF=0.5895;SAS_AF=0.6339;AA_AF=0.1386;EA_AF=0.5695;gnomAD_AF=0.6136;gnomAD_AFR_AF=0.1208;gnomAD_AMR_AF=0.7139;gnomAD_ASJ_AF=0.6344;gnomAD_EAS_AF=0.786;gnomAD_FIN_AF=0.5555;gnomAD_NFE_AF=0.595;gnomAD_OTH_AF=0.5882;gnomAD_SAS_AF=0.6625;MAX_AF=0.786;MAX_AF_POPS=gnomAD_EAS;CLIN_SIG=.;SOMATIC=.;PHENO=.;PUBMED=.;MOTIF_NAME=.;MOTIF_POS=.;HIGH_INF_POS=.;MOTIF_SCORE_CHANGE=.;existing_InFrame_oORFs=.;existing_OutOfFrame_oORFs=.;existing_uORFs=.;five_prime_UTR_variant_annotation=.;five_prime_UTR_variant_consequence=.;SpliceRegion=.;LoF=.;LoF_filter=.;LoF_flags=.;LoF_info=MUTANT_DONOR_MES:11.0784944978449&EXON_END:1000172&INTRON_END:999691&DE_NOVO_DONOR_MES_POS:-156&DE_NOVO_DONOR_POS:-156&EXON_START:999692&DE_NOVO_DONOR_MES:7.0006176073036&DE_NOVO_DONOR_PROB:0.167156964545091&INTRON_START:999614;SpliceAI_pred_DP_AG=6;SpliceAI_pred_DP_AL=-37;SpliceAI_pred_DP_DG=6;SpliceAI_pred_DP_DL=24;SpliceAI_pred_DS_AG=0;SpliceAI_pred_DS_AL=0;SpliceAI_pred_DS_DG=0.01;SpliceAI_pred_DS_DL=0.02;SpliceAI_pred_SYMBOL=HES4 1 12716215 . C A . . CSQ=A|stop_gained|HIGH|AADACL3|ENSG00000188984|Transcript|ENST00000359318|protein_coding|1/4||ENST00000359318.8:c.39C>A|ENSP00000352268.6:p.Cys13Ter|106|39|13|C/*|tgC/tgA|rs3000860||1||SNV|HGNC|HGNC:32037|YES|NM_001103170.3|3|P1|CCDS41253.2|ENSP00000352268|Q5VUY0||UPI0003EAEC70||||PIRSF:PIRSF037251&PANTHER:PTHR23024&PANTHER:PTHR23024:SF115&Transmembrane_helices:TMhelix|||0.1436|0.1188|0.1052|0.2679|0.1362|0.0838||||||||||||0.2679|EAS|||||||||||||||HC|||GERP_DIST:92.57575&BP_DIST:1182&PERCENTILE:0.0318627450980392&DIST_FROM_LAST_EXON:408&50_BP_RULE:PASS&PHYLOCSF_TOO_SHORT|||||||||,A|non_coding_transcript_exon_variant|MODIFIER|AADACL3|ENSG00000188984|Transcript|ENST00000620146|processed_transcript|1/3||ENST00000620146.2:n.101C>A||101|||||rs3000860||1||SNV|HGNC|HGNC:32037|||2|||||||||||||0.1436|0.1188|0.1052|0.2679|0.1362|0.0838||||||||||||0.2679|EAS|||||||||||||||||||||||||||,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000347677|promoter_flanking_region||||||||||rs3000860||||SNV||||||||||||||||||0.1436|0.1188|0.1052|0.2679|0.1362|0.0838||||||||||||0.2679|EAS|||||||||||||||||||||||||||;Allele=A;Consequence=stop_gained;IMPACT=HIGH;SYMBOL=AADACL3;Gene=ENSG00000188984;Feature_type=Transcript;Feature=ENST00000359318;BIOTYPE=protein_coding;EXON=1/4;INTRON=.;HGVSc=ENST00000359318.8:c.39C>A;HGVSp=ENSP00000352268.6:p.Cys13Ter;cDNA_position=106;CDS_position=39;Protein_position=13;Amino_acids=C/*;Codons=tgC/tgA;Existing_variation=rs3000860;DISTANCE=.;STRAND=1;FLAGS=.;VARIANT_CLASS=SNV;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:32037;CANONICAL=YES;MANE=NM_001103170.3;TSL=3;APPRIS=P1;CCDS=CCDS41253.2;ENSP=ENSP00000352268;SWISSPROT=Q5VUY0;TREMBL=.;UNIPARC=UPI0003EAEC70;GENE_PHENO=.;SIFT=.;PolyPhen=.;DOMAINS=PIRSF:PIRSF037251&PANTHER:PTHR23024&PANTHER:PTHR23024:SF115&Transmembrane_helices:TMhelix;miRNA=.;HGVS_OFFSET=.;AF=0.1436;AFR_AF=0.1188;AMR_AF=0.1052;EAS_AF=0.2679;EUR_AF=0.1362;SAS_AF=0.0838;AA_AF=.;EA_AF=.;gnomAD_AF=.;gnomAD_AFR_AF=.;gnomAD_AMR_AF=.;gnomAD_ASJ_AF=.;gnomAD_EAS_AF=.;gnomAD_FIN_AF=.;gnomAD_NFE_AF=.;gnomAD_OTH_AF=.;gnomAD_SAS_AF=.;MAX_AF=0.2679;MAX_AF_POPS=EAS;CLIN_SIG=.;SOMATIC=.;PHENO=.;PUBMED=.;MOTIF_NAME=.;MOTIF_POS=.;HIGH_INF_POS=.;MOTIF_SCORE_CHANGE=.;existing_InFrame_oORFs=.;existing_OutOfFrame_oORFs=.;existing_uORFs=.;five_prime_UTR_variant_annotation=.;five_prime_UTR_variant_consequence=.;SpliceRegion=.;LoF=HC;LoF_filter=.;LoF_flags=.;LoF_info=GERP_DIST:92.57575&BP_DIST:1182&PERCENTILE:0.0318627450980392&DIST_FROM_LAST_EXON:408&50_BP_RULE:PASS&PHYLOCSF_TOO_SHORT;SpliceAI_pred_DP_AG=.;SpliceAI_pred_DP_AL=.;SpliceAI_pred_DP_DG=.;SpliceAI_pred_DP_DL=.;SpliceAI_pred_DS_AG=.;SpliceAI_pred_DS_AL=.;SpliceAI_pred_DS_DG=.;SpliceAI_pred_DS_DL=.;SpliceAI_pred_SYMBOL=. diff --git a/test/split-vep.27.out b/test/split-vep.27.out new file mode 100644 index 000000000..3d55c8410 --- /dev/null +++ b/test/split-vep.27.out @@ -0,0 +1,2 @@ +69511 0.9995 +792480 1 diff --git a/test/test.pl b/test/test.pl index e2178a33b..a87c227eb 100755 --- a/test/test.pl +++ b/test/test.pl @@ -76,6 +76,8 @@ run_test(\&test_vcf_merge,$opts,in=>['merge.4.a','merge.4.b'],out=>'merge.4.out',args=>'--force-samples -m id'); run_test(\&test_vcf_merge,$opts,in=>['gvcf.merge.1','gvcf.merge.2','gvcf.merge.3'],out=>'gvcf.merge.1.out',args=>'--gvcf -'); run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.2.a','merge.gvcf.2.b','merge.gvcf.2.c'],out=>'merge.gvcf.2.out',args=>'--gvcf -',types=>['vcf']); +run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.2.a','merge.gvcf.2.b','merge.gvcf.2.c'],out=>'merge.gvcf.2.1.out',args=>'--gvcf - -m both,*',types=>['vcf']); +run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.2.a','merge.gvcf.2.b','merge.gvcf.2.c'],out=>'merge.gvcf.2.2.out',args=>'--gvcf - -m both,**',types=>['vcf']); run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.3.a','merge.gvcf.3.b'],out=>'merge.gvcf.3.out',args=>'--gvcf - -i SRC:join',types=>['vcf']); run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.4.a','merge.gvcf.4.b'],out=>'merge.gvcf.4.out',args=>'--gvcf -'); run_test(\&test_vcf_merge,$opts,in=>['merge.5.a','merge.5.b'],out=>'merge.5.out'); @@ -109,6 +111,8 @@ run_test(\&test_vcf_query,$opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided,_conflicting_interpretations"']); run_test(\&test_vcf_query,$opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided" || CLNREVSTAT="_conflicting_interpretations"']); run_test(\&test_vcf_query,$opts,in=>'query.string',out=>'query.string.2.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided" && CLNREVSTAT="_conflicting_interpretations"']); +run_test(\&test_vcf_query,$opts,in=>'query.string.2',out=>'query.string.2.1.out',args=>q[-f '%CHROM\\t%POS\\t%INFO/STR\\n' -i'INFO/STR=@{PATH}/query.string.2.1.txt']); +run_test(\&test_vcf_query,$opts,in=>'query.string.2',out=>'query.string.2.2.out',args=>q[-f '%CHROM\\t%POS[\\t%STR]\\n' -i'FMT/STR=@{PATH}/query.string.2.2.txt']); run_test(\&test_vcf_query,$opts,in=>'query',out=>'query.out',args=>q[-f '%CHROM\\t%POS\\t%REF\\t%ALT\\t%DP4\\t%AN[\\t%GT\\t%TGT]\\n']); run_test(\&test_vcf_query,$opts,in=>'query.variantkey',out=>'query.variantkey.hex.out',args=>q[-f '%RSX\\t%VKX\\n']); run_test(\&test_vcf_query,$opts,in=>'view.filter',out=>'query.2.out',args=>q[-f'%XRI\\n' -i'XRI[*]>1111']); @@ -237,9 +241,9 @@ run_test(\&test_vcf_query,$opts,in=>'filter.12',out=>'query.89.out',args=>q[-i'FILTER~"A;B"' -f'%FILTER\\n']); run_test(\&test_vcf_query,$opts,in=>'filter.12',out=>'query.90.out',args=>q[-i'FILTER!~"A;B"' -f'%FILTER\\n']); run_test(\&test_vcf_query,$opts,in=>'filter.10',out=>'query.91.out',args=>q[-i'DP%10==2' -f'[ %DP]\\n']); +run_test(\&test_vcf_query,$opts,in=>'filter.13',out=>'query.99.out',args=>q[-i'REF="N"' -f'%CHROM %POS %REF %ALT %QUAL\\n']); run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.95.out',args=>q[-H -f'[%CHROM %POS %SAMPLE %DP %GT\\n]']); -run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.95.out',args=>q[-H -f'[%CHROM %POS %SAMPLE %DP %GT]']); -run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.96.out',args=>q[-H -f'[%CHROM %POS %SAMPLE %DP %GT]\\n']); +run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.96.out',args=>q[-H -f'[%CHROM %POS %SAMPLE %DP %GT\\t]\\n']); run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.97.out',args=>q[-H -f'%CHROM %POS[ %SAMPLE %DP %GT]\\n']); run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.97.out',args=>q[-H -f'%CHROM %POS[ %SAMPLE %DP %GT]']); run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.98.out',args=>q[-H -f'%CHROM %POS[ %SAMPLE][ %DP][ %GT]\\n']); @@ -272,6 +276,7 @@ run_test(\&test_vcf_norm,$opts,in=>'norm.2',fai=>'norm.2',out=>'norm.2.out',args=>'-c s -a'); run_test(\&test_vcf_norm,$opts,in=>'norm.iupac',fai=>'norm.iupac',out=>'norm.iupac.out',args=>'-c s'); run_test(\&test_vcf_norm,$opts,in=>'norm.3',fai=>'norm.3',out=>'norm.3.out',args=>'-c s'); +run_test(\&test_vcf_norm,$opts,in=>'atomize.split.1',out=>'atomize.split.1.0.out',args=>'--atomize --old-rec-tag OLD_REC -m -any'); run_test(\&test_vcf_norm,$opts,in=>'atomize.split.1',out=>'atomize.split.1.1.out',args=>'--atomize --old-rec-tag OLD_REC'); run_test(\&test_vcf_norm,$opts,in=>'atomize.split.1',out=>'atomize.split.1.2.out',args=>'--atomize --atom-overlaps . --old-rec-tag OLD_REC'); run_test(\&test_vcf_norm,$opts,in=>'atomize.split.2',out=>'atomize.split.2.1.out',args=>'--atomize --old-rec-tag OLD_REC'); @@ -286,8 +291,11 @@ run_test(\&test_vcf_norm,$opts,in=>'norm.phased-split',out=>'norm.phased-split.1.out',args=>'-m -any'); run_test(\&test_vcf_norm,$opts,in=>'norm.phased-join',out=>'norm.phased-join.1.out',args=>'-m +any'); run_test(\&test_vcf_norm,$opts,in=>'norm.symbolic',fai=>'norm.symbolic',out=>'norm.symbolic.1.out',args=>'--old-rec-tag ORI'); +run_test(\&test_vcf_norm,$opts,in=>'norm.symbolic.2',fai=>'norm.symbolic',out=>'norm.symbolic.2.out',args=>'--old-rec-tag ORI'); run_test(\&test_vcf_norm,$opts,in=>'norm.right-align',fai=>'norm.right-align',out=>'norm.right-align.1.out',args=>'--old-rec-tag ORI'); run_test(\&test_vcf_norm,$opts,in=>'norm.right-align',fai=>'norm.right-align',out=>'norm.right-align.2.out',args=>'--old-rec-tag ORI -g {PATH}/norm.right-align.gff'); +run_test(\&test_vcf_view,$opts,in=>'merge.gvcf.2.a',out=>'merge.gvcf.2.a.1.out',args=>'-HA'); +run_test(\&test_vcf_view,$opts,in=>'merge.gvcf.2.a',out=>'merge.gvcf.2.a.2.out',args=>'-HAA'); run_test(\&test_vcf_view,$opts,in=>'weird-chr-names',out=>'weird-chr-names.1.out',args=>'',reg=>'-r 1'); run_test(\&test_vcf_view,$opts,in=>'weird-chr-names',out=>'weird-chr-names.1.out',args=>'',reg=>'-r 1:1-2'); run_test(\&test_vcf_view,$opts,in=>'weird-chr-names',out=>'weird-chr-names.1.out',args=>'',reg=>'-r 1:1,1:2'); @@ -357,6 +365,9 @@ run_test(\&test_vcf_view,$opts,in=>'view.sites',out=>'view.sites.1.out',args=>'',tgts=>'view.sites.txt'); run_test(\&test_vcf_view,$opts,in=>'view.sites',out=>'view.sites.1.out',args=>'',tgts=>'view.sites.txt.gz'); run_test(\&test_vcf_head,$opts,in=>'mpileup.2.vcf',in_nheaders=>22); +run_test(\&test_vcf_head2,$opts,in=>'mpileup.2',out=>'head.1.out',args=>'-s0'); +run_test(\&test_vcf_head2,$opts,in=>'mpileup.2',out=>'head.2.out',args=>'-s1'); +run_test(\&test_vcf_head2,$opts,in=>'mpileup.2',out=>'head.3.out',args=>'-s2 -h2'); run_test(\&test_vcf_call,$opts,in=>'mpileup',out=>'mpileup.1.out',args=>'-mv'); run_test(\&test_vcf_call,$opts,in=>'mpileup',out=>'mpileup.2.out',args=>'-mg0'); run_test(\&test_vcf_call,$opts,in=>'mpileup',out=>'mpileup.3.out',args=>'-mv -S {PATH}/mpileup.3.samples'); @@ -507,6 +518,12 @@ run_test(\&test_vcf_annotate,$opts,in=>'annotate18.2',tab=>'annotate18.2',out=>'annotate18.2.out',args=>'-c CHROM,BEG,END,A,B,C,D,E -l A:sum,B:avg,C:min,D:max,E:append'); run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst',vcf=>'annotate19.src',out=>'annotate19.1.out',args=>'-c INFO/ID:=ID,INFO/INFO_ID:=INFO/ID,ID,=ID:=INFO/ID'); run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst',vcf=>'annotate19.src',out=>'annotate19.2.out',args=>'-c FILTER,INFO/FILTER:=FILTER,INFO/INFO_FILTER:=INFO/FILTER'); +run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst',vcf=>'annotate19.src',out=>'annotate19.3.out',args=>'-c INFO/FILTER:=FILTER,INFO/INFO_FILTER:=INFO/FILTER'); +run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst', out=>'annotate19.4.out',args=>'-c INFO/FILTER:=FILTER'); +run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst',vcf=>'annotate19.src',out=>'annotate19.5.out',args=>'-c INFO/FILTER:=FILTER'); +run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst',vcf=>'annotate19.src',out=>'annotate19.4.out',args=>'-c INFO/FILTER:=./FILTER'); +run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst',vcf=>'annotate19.src',out=>'annotate19.6.out',args=>'-c INFO/FILTER:=./FILTER,FILTER'); +run_test(\&test_vcf_annotate,$opts,in=>'annotate19.dst',vcf=>'annotate19.src',out=>'annotate19.7.out',args=>'-c FILTER,INFO/FILTER:=./FILTER'); run_test(\&test_vcf_annotate,$opts,in=>'annotate20.dst',vcf=>'annotate20.src',out=>'annotate20.1.out',args=>'-c FMT/GT'); run_test(\&test_vcf_annotate,$opts,in=>'annotate20.dst',vcf=>'annotate20.src',out=>'annotate20.2.out',args=>'-c +FMT/GT'); run_test(\&test_vcf_annotate,$opts,in=>'annotate20.dst',vcf=>'annotate20.src',out=>'annotate20.3.out',args=>'-c -FMT/GT'); @@ -539,6 +556,7 @@ run_test(\&test_vcf_annotate,$opts,in=>'annotate28',tab=>'annots28',out=>'annotate28.4.out',args=>'-c CHROM,POS,REF,ALT,FMT/TEST -s smpl2'); run_test(\&test_vcf_annotate,$opts,in=>'annotate',out=>'annotate.33.out',args=>'-m XXX'); run_test(\&test_vcf_annotate,$opts,in=>'annotate34',tab=>'annots34',out=>'annotate34.out',args=>q[-c CHROM,FROM,TO,INFO/END -H '##INFO=']); +run_test(\&test_vcf_annotate,$opts,in=>'annots-mark',bed=>'annots-mark',out=>'annots-mark.1.out',args=>q[-c CHROM,FROM,TO -m TAG]); run_test(\&test_vcf_plugin,$opts,in=>'checkploidy',out=>'checkploidy.out',cmd=>'+check-ploidy --no-version'); run_test(\&test_vcf_plugin,$opts,in=>'checkploidy.2',out=>'checkploidy.2.out',cmd=>'+check-ploidy --no-version'); run_test(\&test_vcf_plugin,$opts,in=>'checkploidy.2',out=>'checkploidy.3.out',cmd=>'+check-ploidy --no-version',args=>'-- -m'); @@ -690,6 +708,7 @@ run_test(\&test_vcf_plugin,$opts,in=>'split-vep.broken-LoF',out=>'split-vep.broken-LoF.out',cmd=>'+split-vep',args=>qq[-d -f '%CHROM:%POS %Consequence %LoF_info\\n' -a vep]); run_test(\&test_vcf_plugin,$opts,in=>'split-vep.broken-LoF',out=>'split-vep.broken-LoF.2.out',cmd=>'+split-vep',args=>qq[-d -f '%CHROM:%POS %LoF_info\\n' -a vep -i 'Consequence=="frameshift_variant"']); run_test(\&test_vcf_plugin,$opts,in=>'split-vep',out=>'split-vep.26.out',cmd=>'+split-vep',args=>qq[-f'%POS\\n' -i'SYMBOL~"SAMD11"']); +run_test(\&test_vcf_plugin,$opts,in=>'split-vep',out=>'split-vep.27.out',cmd=>'+split-vep',args=>qq[-f'%POS\\t%MAX_AF\\n' -i'MAX_AF>0.999' -c MAX_AF,MAX_AF:float,MAX_AF]); run_test(\&test_vcf_plugin,$opts,in=>'split-vep.filter',out=>'split-vep.filter.1.out',cmd=>'+split-vep',args=>qq[-s worst -i'CSQ~"nonsense"' -f '%POS %Consequence %Feature %BIOTYPE']); run_test(\&test_vcf_plugin,$opts,in=>'split-vep.filter',out=>'split-vep.filter.2.out',cmd=>'+split-vep',args=>qq[-s worst -i'CSQ~"nonsense"' -f '%POS %Consequence %Feature %BIOTYPE %CSQ']); run_test(\&test_vcf_plugin,$opts,in=>'parental-origin',out=>'parental-origin.1.out',cmd=>'+parental-origin',args=>qq[-r 20:100 -p proband,father,mother -t del | grep -v ^#]); @@ -912,6 +931,8 @@ run_test(\&test_gtcheck,$opts,in=>'gtcheck.ntop',gts=>'gtcheck.ntop.gts',out=>'gtcheck.ntop.1.out',args=>q[]); run_test(\&test_gtcheck,$opts,in=>'gtcheck.ntop',gts=>'gtcheck.ntop.gts',out=>'gtcheck.ntop.2.out',args=>q[--n-matches 2]); run_test(\&test_gtcheck,$opts,in=>'gtcheck.5',gts=>'gtcheck.5.gts',out=>'gtcheck.5.1.out',args=>q[],grep=>'grep -v Time'); +run_test(\&test_gtcheck,$opts,in=>'gtcheck.6',out=>'gtcheck.6.1.out',args=>q[-p A,B,B,C]); +run_test(\&test_gtcheck,$opts,in=>'gtcheck.3',out=>'gtcheck.3.1.out',args=>q[-t 11:33 -p A,D,A,E,D,E -u GT -e 10]); print "\nNumber of tests:\n"; printf " total .. %d\n", $$opts{nok}+$$opts{nfailed}; @@ -1431,8 +1452,11 @@ sub test_vcf_norm my ($opts,%args) = @_; bgzip_tabix_vcf($opts,$args{in}); my $params = ''; - $args{args} =~ s/{PATH}/$$opts{path}/g; - if ( exists($args{args}) ) { $params .= " $args{args}"; } + if ( exists($args{args}) ) + { + $args{args} =~ s/{PATH}/$$opts{path}/g; + $params .= " $args{args}"; + } if ( exists($args{fai} ) ) { $params .= " -f $$opts{path}/$args{fai}.fa"; } test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools norm --no-version $params $$opts{tmp}/$args{in}.vcf.gz",exp_fix=>1); test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools norm -Ob $params $$opts{tmp}/$args{in}.vcf.gz | $$opts{bin}/bcftools view | grep -v ^##bcftools_",exp_fix=>1); @@ -1507,6 +1531,13 @@ sub test_vcf_head test_cmd($opts, %args, gen_head_output(5, 5, "fiveboth", $infile), cmd => "$$opts{bin}/bcftools head -h 5 -n 5 < $infile"); } +sub test_vcf_head2 +{ + my ($opts,%args) = @_; + bgzip_tabix_vcf($opts,$args{in}); + test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools head $args{args} $$opts{tmp}/$args{in}.vcf.gz"); + test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools view --no-version -Ob $$opts{tmp}/$args{in}.vcf.gz | $$opts{bin}/bcftools head $args{args}"); +} sub test_vcf_call { my ($opts,%args) = @_; @@ -1683,6 +1714,13 @@ sub test_vcf_annotate $in_fname = "$$opts{path}/$args{in}.vcf"; $hdr = (-e "$$opts{path}/$args{in}.hdr" && !($args{args}=~/-H/) && !($args{args}=~/--header-line\s/)) ? "-h $$opts{path}/$args{in}.hdr" : ''; } + elsif ( exists($args{bed}) ) + { + bgzip_tabix($opts,file=>$args{bed},suffix=>'bed',args=>'-p bed'); + $annot_fname = "-a $$opts{tmp}/$args{bed}.bed.gz"; + $in_fname = "$$opts{path}/$args{in}.vcf"; + $hdr = (-e "$$opts{path}/$args{in}.hdr" && !($args{args}=~/-H/) && !($args{args}=~/--header-line\s/)) ? "-h $$opts{path}/$args{in}.hdr" : ''; + } elsif ( exists($args{vcf}) ) { bgzip_tabix_vcf($opts,"$args{in}"); diff --git a/test/trio-dnm/trio-dnm.9.2.out b/test/trio-dnm/trio-dnm.9.2.out index d54e8275d..b3f3a3d67 100644 --- a/test/trio-dnm/trio-dnm.9.2.out +++ b/test/trio-dnm/trio-dnm.9.2.out @@ -10,6 +10,6 @@ 1 . . 1 . . 1 . . - 1 . . + . . . . . . . . . diff --git a/vcfannotate.c b/vcfannotate.c index b2e39ef7b..857a205f0 100644 --- a/vcfannotate.c +++ b/vcfannotate.c @@ -170,6 +170,7 @@ typedef struct _args_t int argc, drop_header, record_cmd_line, tgts_is_vcf, mark_sites_logic, force, single_overlaps; int columns_is_file, has_append_mode, pair_logic; dbuf_t *header_lines; + bcf1_t *current_rec; // current record for local setters } args_t; @@ -510,17 +511,21 @@ static int vcf_getter_info_str2str(args_t *args, bcf1_t *rec, annot_col_t *col, static int vcf_getter_id2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr) { char *str = *((char**)ptr); - int len = strlen(rec->d.id); + int i, len = strlen(rec->d.id); if ( len >= *mptr ) str = realloc(str, len+1); - strcpy(str, rec->d.id); + for (i=0; id.id[i]==';' ? ',' : rec->d.id[i]; + str[len] = 0; *((char**)ptr) = str; *mptr = len+1; return len; } -static int vcf_getter_filter2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr) +inline static int vcf_getter_filter2str_core(bcf_hdr_t *hdr, bcf1_t *rec, char **ptr, int *mptr) { + if ( !(rec->unpacked & BCF_UN_FLT) ) bcf_unpack(rec, BCF_UN_FLT); + kstring_t str; - str.s = *((char**)ptr); + str.s = *ptr; str.m = *mptr; str.l = 0; @@ -529,16 +534,24 @@ static int vcf_getter_filter2str(args_t *args, bcf1_t *rec, annot_col_t *col, vo { for (i=0; id.n_flt; i++) { - if (i) kputc(';', &str); - kputs(bcf_hdr_int2id(args->tgts_hdr,BCF_DT_ID,rec->d.flt[i]), &str); + if (i) kputc(',', &str); + kputs(bcf_hdr_int2id(hdr,BCF_DT_ID,rec->d.flt[i]), &str); } } else kputc('.', &str); - *((char**)ptr) = str.s; + *ptr = str.s; *mptr = str.m; return str.l; } +static int vcf_getter_filter2str_local(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr) +{ + return vcf_getter_filter2str_core(args->hdr_out, args->current_rec, (char**)ptr, mptr); +} +static int vcf_getter_filter2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr) +{ + return vcf_getter_filter2str_core(args->tgts_hdr, rec, (char**)ptr, mptr); +} static int setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void *data) { if ( !data ) error("Error: the --merge-logic option cannot be used with FILTER (yet?)\n"); @@ -2290,10 +2303,30 @@ static void init_columns(args_t *args) if ( bcf_hdr_id2type(args->tgts_hdr,BCF_HL_INFO,hdr_id)!=BCF_HT_STR ) error("Only Type=String tags can be used to annotate the ID column\n"); } - else if ( (ptr=strstr(str.s,":=")) && !args->targets_fname ) + else if ( (ptr=strstr(str.s,":=")) && (!args->targets_fname || !strncasecmp(ptr+2,"./",2)) ) { *ptr = 0; - rename_annots_push(args,ptr+2,str.s); + if ( !strncasecmp(str.s,"INFO/",5) && (!strcasecmp(ptr+2,"FILTER") || !strcasecmp(ptr+2,"./FILTER")) ) + { + // -a not present and transferring filter, needs to be a local transfer + args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols); + annot_col_t *col = &args->cols[args->ncols-1]; + memset(col,0,sizeof(*col)); + col->icol = icol; + col->replace = replace; + col->setter = vcf_setter_info_str; + col->getter = vcf_getter_filter2str_local; + col->hdr_key_src = strdup(ptr+2); + col->hdr_key_dst = strdup(str.s+5); + tmp.l = 0; + ksprintf(&tmp,"##INFO=",col->hdr_key_dst); + bcf_hdr_append(args->hdr_out, tmp.s); + if (bcf_hdr_sync(args->hdr_out) < 0) error_errno("[%s] Failed to update header", __func__); + int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, col->hdr_key_dst); + col->number = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id); + } + else + rename_annots_push(args,ptr+2,str.s); *ptr = ':'; } else if ( !strcasecmp("FILTER",str.s) ) @@ -2487,6 +2520,13 @@ static void init_columns(args_t *args) " (the annotation type is modified to \"Number=.\" and allele ordering is disregarded)\n"); fprintf(stderr,"Warning: the =INFO/TAG feature modifies the annotation to \"Number=.\" and disregards allele ordering\n"); } + + args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols); + annot_col_t *col = &args->cols[args->ncols-1]; + memset(col,0,sizeof(*col)); + col->icol = icol; + col->replace = replace; + int explicit_src_info = 0; int explicit_dst_info = 0; char *key_dst; @@ -2517,15 +2557,14 @@ static void init_columns(args_t *args) key_src[-2] = ':'; error("Did you mean \"FMT/%s\" rather than \"%s\"?\n",str.s,str.s); } + else if ( !strcasecmp("FILTER",key_src) && args->tgts_is_vcf ) + { + col->getter = vcf_getter_filter2str; + } } else key_src = key_dst; - args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols); - annot_col_t *col = &args->cols[args->ncols-1]; - memset(col,0,sizeof(*col)); - col->icol = icol; - col->replace = replace; col->hdr_key_src = strdup(key_src); col->hdr_key_dst = strdup(key_dst); @@ -2782,7 +2821,7 @@ static void rename_annots(args_t *args) while ( *ptr && isspace(*ptr) ) ptr++; if ( !*ptr ) { *rmme = ' '; error("Could not parse: %s\n", args->rename_annots_map[i]); } if ( rename_annots_core(args, args->rename_annots_map[i], ptr) < 0 ) - error("Could not parse \"%s %s\", expected INFO, FORMAT, or FILTER prefix\n",args->rename_annots_map[i],ptr); + error("Cannot rename \"%s\" to \"%s\"\n",args->rename_annots_map[i],ptr); } } static void rename_annots_push(args_t *args, char *src, char *dst) @@ -3084,6 +3123,8 @@ static int strstr_match(char *a, char *b) } static void annotate(args_t *args, bcf1_t *line) { + args->current_rec = line; + int i, j; for (i=0; inrm; i++) args->rm[i].handler(args, line, &args->rm[i]); @@ -3111,6 +3152,12 @@ static void annotate(args_t *args, bcf1_t *line) if ( args->min_overlap_vcf && args->min_overlap_vcf > (float)isec/len_vcf ) continue; parse_annot_line(args, regitr_payload(args->tgt_itr,char*), tmp); + + // If a plain BED file is provided and we are asked to just mark overlapping sites, there are + // no additional columns. Not sure if there can be any side effects for ill-formatted BED files + // with variable number of columns + if ( !args->ncols && args->mark_sites ) has_overlap = 1; + for (j=0; jncols; j++) { if ( args->cols[j].done==1 ) continue; @@ -3280,6 +3327,15 @@ static void annotate(args_t *args, bcf1_t *line) has_overlap = 1; } } + else if ( args->ncols ) + { + for (j=0; jncols; j++) + { + if ( !args->cols[j].setter ) continue; + if ( args->cols[j].setter(args,line,&args->cols[j],NULL) ) + error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1); + } + } if ( args->set_ids ) { args->tmpks.l = 0; diff --git a/vcfcall.c b/vcfcall.c index d2f6e2c5f..e67789d9a 100644 --- a/vcfcall.c +++ b/vcfcall.c @@ -910,6 +910,7 @@ static void usage(args_t *args) fprintf(stderr, "\n"); fprintf(stderr, "Input/output options:\n"); fprintf(stderr, " -A, --keep-alts Keep all possible alternate alleles at variant sites\n"); + fprintf(stderr, " -*, --keep-unseen-allele Keep the unobserved allele <*> or \n"); fprintf(stderr, " -a, --annotate LIST Optional tags to output (lowercase allowed); '?' to list available tags\n"); fprintf(stderr, " -F, --prior-freqs AN,AC Use prior allele frequencies, determined from these pre-filled tags\n"); fprintf(stderr, " -G, --group-samples FILE|- Group samples by population (file with \"sample\\tgroup\") or \"-\" for single-sample calling.\n"); @@ -987,6 +988,7 @@ int main_vcfcall(int argc, char *argv[]) {"targets-file",required_argument,NULL,'T'}, {"threads",required_argument,NULL,9}, {"keep-alts",no_argument,NULL,'A'}, + {"keep-unseen-allele",no_argument,NULL,'*'}, {"insert-missed",no_argument,NULL,'i'}, {"skip-Ns",no_argument,NULL,'N'}, // now the new default {"keep-masked-refs",no_argument,NULL,'M'}, @@ -1008,7 +1010,7 @@ int main_vcfcall(int argc, char *argv[]) }; char *tmp = NULL; - while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:a:ig:XYF:G:", loptions, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:A*NMV:vcmp:C:n:P:f:a:ig:XYF:G:", loptions, NULL)) >= 0) { switch (c) { @@ -1026,6 +1028,7 @@ int main_vcfcall(int argc, char *argv[]) case 'M': args.flag &= ~CF_ACGT_ONLY; break; // keep sites where REF is N case 'N': args.flag |= CF_ACGT_ONLY; break; // omit sites where first base in REF is N (the new default) case 'A': args.aux.flag |= CALL_KEEPALT; break; + case '*': args.aux.flag |= CALL_KEEP_UNSEEN; break; case 'c': args.flag |= CF_CCALL; break; // the original EM based calling method case 'i': args.flag |= CF_INS_MISSED; break; case 'v': args.aux.flag |= CALL_VARONLY; break; diff --git a/vcfgtcheck.c b/vcfgtcheck.c index 561be62a5..f21f7cf05 100644 --- a/vcfgtcheck.c +++ b/vcfgtcheck.c @@ -39,11 +39,17 @@ THE SOFTWARE. */ #include #include #include +#include #include #include #include "bcftools.h" #include "extsort.h" //#include "hclust.h" +#include "filter.h" + +// Logic of the filters: include or exclude sites which match the filters? +#define FLT_INCLUDE 1 +#define FLT_EXCLUDE 2 typedef struct { @@ -56,20 +62,22 @@ typedef struct bcf_srs_t *files; // first reader is the query VCF - single sample normally or multi-sample for cross-check bcf_hdr_t *gt_hdr, *qry_hdr; // VCF with genotypes to compare against and the query VCF char *cwd, **argv, *gt_samples, *qry_samples, *regions, *targets, *qry_fname, *gt_fname, *pair_samples; - int argc, gt_samples_is_file, qry_samples_is_file, regions_is_file, targets_is_file, pair_samples_is_file; - int regions_overlap, targets_overlap; + char *output_fname; + int argc, gt_samples_is_file, qry_samples_is_file, regions_is_file, targets_is_file, pair_samples_is_file, output_type;; + int regions_overlap, targets_overlap, clevel; int qry_use_GT,gt_use_GT, nqry_smpl,ngt_smpl, *qry_smpl,*gt_smpl; int nused[2][2]; double *pdiff, *qry_prob, *gt_prob; - uint32_t *ndiff,*ncnt,ncmp, npairs; + uint32_t *ndiff,*ncnt,*nmatch,ncmp, npairs; int32_t *qry_arr,*gt_arr, nqry_arr,ngt_arr; uint8_t *qry_dsg, *gt_dsg; pair_t *pairs; double *hwe_prob, dsg2prob[8][3], pl2prob[256]; double min_inter_err, max_intra_err; int all_sites, hom_only, ntop, cross_check, calc_hwe_prob, sort_by_hwe, dry_run, use_PLs; - FILE *fp; - unsigned int nskip_no_match, nskip_not_ba, nskip_mono, nskip_no_data, nskip_dip_GT, nskip_dip_PL; + BGZF *out_fh; + unsigned int nskip_no_match, nskip_not_ba, nskip_mono, nskip_no_data, nskip_dip_GT, nskip_dip_PL, nskip_filter; + kstring_t kstr; // for --distinctive-sites double distinctive_sites; @@ -77,6 +85,11 @@ typedef struct size_t diff_sites_size; extsort_t *es; char *es_tmp_prefix, *es_max_mem; + + // include or exclude sites which match the filters + filter_t *qry_filter, *gt_filter; + char *qry_filter_str, *gt_filter_str; + int qry_filter_logic, gt_filter_logic; // FLT_INCLUDE or FLT_EXCLUDE } args_t; @@ -94,15 +107,17 @@ static void set_cwd(args_t *args) } assert(buf); } -static void print_header(args_t *args, FILE *fp) +static void print_header(args_t *args) { - fprintf(fp, "# This file was produced by bcftools (%s+htslib-%s), the command line was:\n", bcftools_version(), hts_version()); - fprintf(fp, "# \t bcftools %s ", args->argv[0]); + args->kstr.l = 0; + ksprintf(&args->kstr, "# This file was produced by bcftools (%s+htslib-%s), the command line was:\n", bcftools_version(), hts_version()); + ksprintf(&args->kstr, "# \t bcftools %s ", args->argv[0]); int i; for (i=1; iargc; i++) - fprintf(fp, " %s",args->argv[i]); - fprintf(fp, "\n# and the working directory was:\n"); - fprintf(fp, "# \t %s\n#\n", args->cwd); + ksprintf(&args->kstr, " %s",args->argv[i]); + ksprintf(&args->kstr, "\n# and the working directory was:\n"); + ksprintf(&args->kstr, "# \t %s\n#\n", args->cwd); + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); } static int cmp_int(const void *_a, const void *_b) @@ -262,6 +277,9 @@ static void init_data(args_t *args) if ( !bcf_hdr_nsamples(args->gt_hdr) ) error("No samples in %s?\n", args->gt_fname); } + if ( args->gt_hdr && args->gt_filter_str ) args->gt_filter = filter_init(args->gt_hdr, args->gt_filter_str); + if ( args->qry_hdr && args->qry_filter_str ) args->qry_filter = filter_init(args->qry_hdr, args->qry_filter_str); + // Determine whether GT or PL will be used if ( args->qry_use_GT==-1 ) // not set by -u, qry uses PL by default { @@ -377,50 +395,70 @@ static void init_data(args_t *args) args->qry_prob = (double*) malloc(3*args->nqry_smpl*sizeof(*args->qry_prob)); args->gt_prob = args->cross_check ? args->qry_prob : (double*) malloc(3*args->ngt_smpl*sizeof(*args->gt_prob)); + // Convert genotypes to genotype likelihoods given by -E, the probability of reading one allele incorrectly. In this + // simple model we have: + // - probability of reading an allele incorrectly, eg. 0 as 1 or 1 as 0 + // P(0|1) = P(1|0) = e + // - probability of genotype G={00,01,11} being correct given observed dosage {0,1,2} and the + // genotyping error probability e: + // P(00|0) = 1 P(00|1) = e P(00|2) = e^2 + // P(01|0) = e P(01|1) = 1 P(01|2) = e + // P(11|0) = e^2 P(11|1) = e P(11|2) = 1 + // // dsg2prob: the first index is bitmask of 8 possible dsg combinations (only 1<<0,1<<2,1<<3 are set, accessing - // anything else indicated an error, this is just to reuse gt_to_dsg()); the second index are the corresponding + // anything else indicated an error, this is just to reuse gt_to_dsg(); the second index are the corresponding // probabilities of 0/0, 0/1, and 1/1 genotypes + // for (i=0; i<8; i++) for (j=0; j<3; j++) args->dsg2prob[i][j] = HUGE_VAL; - args->dsg2prob[1][0] = -log(1-pow(10,-0.1*args->use_PLs)); - args->dsg2prob[1][1] = -log(0.5*pow(10,-0.1*args->use_PLs)); - args->dsg2prob[1][2] = -log(0.5*pow(10,-0.1*args->use_PLs)); - args->dsg2prob[2][0] = -log(0.5*pow(10,-0.1*args->use_PLs)); - args->dsg2prob[2][1] = -log(1-pow(10,-0.1*args->use_PLs)); - args->dsg2prob[2][2] = -log(0.5*pow(10,-0.1*args->use_PLs)); - args->dsg2prob[4][0] = -log(0.5*pow(10,-0.1*args->use_PLs)); - args->dsg2prob[4][1] = -log(0.5*pow(10,-0.1*args->use_PLs)); - args->dsg2prob[4][2] = -log(1-pow(10,-0.1*args->use_PLs)); + double eprob = pow(10,-0.1*args->use_PLs); // convert from phred score to probability + args->dsg2prob[1][0] = 0; // P(00|0) = 1 + args->dsg2prob[1][1] = -log(eprob); // P(01|0) = e + args->dsg2prob[1][2] = -2*log(eprob); // P(11|0) = e^2 + args->dsg2prob[2][0] = -log(eprob); // P(00|1) = e + args->dsg2prob[2][1] = 0; // P(01|1) = 1 + args->dsg2prob[2][2] = -log(eprob); // P(11|1) = e + args->dsg2prob[4][0] = -2*log(eprob); // P(00|2) = e^2 + args->dsg2prob[4][1] = -log(eprob); // P(01|2) = e + args->dsg2prob[4][2] = 0; // P(11|2) = 1 // lookup table to avoid exponentiation for (i=0; i<256; i++) args->pl2prob[i] = pow(10,-0.1*i); } else args->ndiff = (uint32_t*) calloc(args->npairs,sizeof(*args->ndiff)); // number of differing genotypes for each pair of samples - args->ncnt = (uint32_t*) calloc(args->npairs,sizeof(*args->ncnt)); // number of comparisons performed (non-missing data) + args->ncnt = (uint32_t*) calloc(args->npairs,sizeof(*args->ncnt)); // number of comparisons performed (non-missing data) if ( !args->ncnt ) error("Error: failed to allocate %.1f Mb\n", args->npairs*sizeof(*args->ncnt)/1e6); if ( args->calc_hwe_prob ) { // prob of the observed sequence of matches given site AFs and HWE args->hwe_prob = (double*) calloc(args->npairs,sizeof(*args->hwe_prob)); if ( !args->hwe_prob ) error("Error: failed to allocate %.1f Mb. Run with --no-HWE-prob to save some memory.\n", args->npairs*sizeof(*args->hwe_prob)/1e6); + args->nmatch = (uint32_t*) calloc(args->npairs,sizeof(*args->ncnt)); // number of matches, used only with calc_hwe_prob + if ( !args->nmatch ) error("Error: failed to allocate %.1f Mb.\n", args->npairs*sizeof(*args->ncnt)/1e6); } if ( args->distinctive_sites ) diff_sites_init(args); - args->fp = stdout; - print_header(args, args->fp); + args->out_fh = bgzf_open(args->output_fname, args->output_type&FT_GZ ? "wg" : "wu"); + if ( args->out_fh == NULL ) + error("[%s] Error: cannot write to %s: %s\n", __func__,args->output_fname ? args->output_fname : "standard output", strerror(errno)); + + print_header(args); } static void destroy_data(args_t *args) { + free(args->kstr.s); + if ( args->gt_filter ) filter_destroy(args->gt_filter); + if ( args->qry_filter ) filter_destroy(args->qry_filter); if ( args->gt_dsg!=args->qry_dsg ) free(args->gt_dsg); free(args->qry_dsg); if ( args->gt_prob!=args->qry_prob ) free(args->gt_prob); free(args->qry_prob); free(args->es_max_mem); - fclose(args->fp); + if ( bgzf_close(args->out_fh)!=0 ) error("Error: close failed .. %s\n", args->output_fname?args->output_fname:"stdout"); if ( args->distinctive_sites ) diff_sites_destroy(args); free(args->hwe_prob); free(args->cwd); @@ -429,6 +467,7 @@ static void destroy_data(args_t *args) free(args->pdiff); free(args->ndiff); free(args->ncnt); + free(args->nmatch); free(args->qry_smpl); if ( args->gt_smpl!=args->qry_smpl ) free(args->gt_smpl); free(args->pairs); @@ -538,6 +577,13 @@ static void process_line(args_t *args) int i,j,k, nqry1, ngt1, ret; bcf1_t *gt_rec = NULL, *qry_rec = bcf_sr_get_line(args->files,0); // the query file + if ( args->qry_filter ) + { + int pass = filter_test(args->qry_filter, qry_rec, NULL); + if ( args->qry_filter_logic==FLT_EXCLUDE ) pass = pass ? 0 : 1; + if ( !pass ) { args->nskip_filter++; return; } + } + int qry_use_GT = args->qry_use_GT; int gt_use_GT = args->gt_use_GT; @@ -547,6 +593,12 @@ static void process_line(args_t *args) if ( args->gt_hdr ) { gt_rec = bcf_sr_get_line(args->files,1); + if ( args->gt_filter ) + { + int pass = filter_test(args->gt_filter, gt_rec, NULL); + if ( args->gt_filter_logic==FLT_EXCLUDE ) pass = pass ? 0 : 1; + if ( !pass ) { args->nskip_filter++; return; } + } ret = set_data(args, args->gt_hdr, gt_rec, &args->gt_arr, &args->ngt_arr, &ngt1, >_use_GT); if ( ret<0 ) return; } @@ -560,7 +612,7 @@ static void process_line(args_t *args) args->ncmp++; args->nused[qry_use_GT][gt_use_GT]++; - double af,hwe_dsg[8]; + double hwe_dsg[8]; if ( args->calc_hwe_prob ) { int ac[2]; @@ -570,18 +622,28 @@ static void process_line(args_t *args) } else if ( bcf_calc_ac(args->qry_hdr, qry_rec, ac, BCF_UN_INFO|BCF_UN_FMT)!=1 ) error("todo: bcf_calc_ac() failed\n"); - // hwe indexes correspond to the bitmask of eight dsg combinations to account for PL uncertainty - // for in the extreme case we can have uninformative PL=0,0,0. So the values are the minima of e.g. - // hwe[1,2,4] .. dsg=0,1,2 - // hwe[3] .. dsg=0 or 1 - // hwe[6] .. dsg=1 or 2 - - double hwe[3]; - const double min_af = 1e-5; // cap the AF in case we get unrealistic values - af = (double)ac[1]/(ac[0]+ac[1]); - hwe[0] = af>min_af ? -log(af*af) : -log(min_af*min_af); - hwe[1] = af>min_af && af<1-min_af ? -log(2*af*(1-af)) : -log(2*min_af*(1-min_af)); - hwe[2] = af<(1-min_af) ? -log((1-af)*(1-af)) : -log(min_af*min_af); + // Calculate HWE probability for each possible qry+gt dosage combination. The alternate allele dosage + // values returned by gt_to_prob() below are 0,1,2,4 (0=missing, 1<<0, 1<<1, 1<<2). We consider only + // biallelic sites, therefore we work with eight genotype combinations. + // + // The array hwe_dsg is accessed with hwe_dsg[qry_dsg & gt_dsg] and is constructed to account for PL uncertainty + // when we encounter less informative PL, such as PL=0,0,10, where multiple dosage values are equally + // likely. If we allowed complete uncertainty (PL=0,0,0), we'd have up to eight possible genotype + // mask combinations: from e.g. 0=(gt_dsg=1<<0 & qry_dsg=1<<1) to 7=(gt_dsg=1<<0|1<<1|1<<2 & qry_dsg=1<<0|1<<1|1<<2). + // Note the extreme case of 1|2|4 is skipped, see pl_to_dsg(). + // + // When the dosage is uncertain, we take the minimum of their corresponding HWE value, for example + // hwe[0] = 0 + // hwe[1] = (1-AF)**2 + // hwe[2] = 2*AF*(1-AF) + // hwe[4] = AF**2 + // hwe[3] = min{hwe[1],hwe[2]} + + double hwe[3]; // while hwe_dsg iterates over dsg bitmasks (0..7), hwe iterates over dsg (0,1,2) + double af = ac[0]+ac[1] ? (double)ac[1]/(ac[0]+ac[1]) : 1e-6; + hwe[0] = -log((1-af)*(1-af)); + hwe[1] = -log(2*af*(1-af)); + hwe[2] = -log(af*af); hwe_dsg[0] = 0; for (i=1; i<8; i++) { @@ -621,7 +683,11 @@ static void process_line(args_t *args) args->ndiff[i]++; if ( args->kbs_diff ) { ndiff++; kbs_insert(args->kbs_diff, i); } } - else if ( args->calc_hwe_prob ) args->hwe_prob[i] += hwe_dsg[match]; + else if ( args->calc_hwe_prob ) + { + args->hwe_prob[i] += hwe_dsg[match]; + args->nmatch[i]++; + } args->ncnt[i]++; } @@ -655,6 +721,7 @@ static void process_line(args_t *args) { int match = qry_dsg & gt_dsg; args->hwe_prob[i] += hwe_dsg[match]; + args->nmatch[i]++; } args->ncnt[i]++; } @@ -690,7 +757,11 @@ static void process_line(args_t *args) if ( !args->gt_dsg[j] ) { idx++; continue; } // missing value int match = args->qry_dsg[i] & args->gt_dsg[j]; if ( !match ) args->ndiff[idx]++; - else if ( args->calc_hwe_prob ) args->hwe_prob[idx] += hwe_dsg[match]; + else if ( args->calc_hwe_prob ) + { + args->hwe_prob[idx] += hwe_dsg[match]; + args->nmatch[idx]++; + } args->ncnt[idx]++; idx++; } @@ -731,6 +802,7 @@ static void process_line(args_t *args) { int match = args->qry_dsg[i] & args->gt_dsg[j]; args->hwe_prob[idx] += hwe_dsg[match]; + args->nmatch[idx]++; } args->ncnt[idx]++; idx++; @@ -758,12 +830,13 @@ static void report_distinctive_sites(args_t *args) { extsort_sort(args->es); - fprintf(args->fp,"# DS, distinctive sites:\n"); - fprintf(args->fp,"# - chromosome\n"); - fprintf(args->fp,"# - position\n"); - fprintf(args->fp,"# - cumulative number of pairs distinguished by this block\n"); - fprintf(args->fp,"# - block id\n"); - fprintf(args->fp,"#DS\t[2]Chromosome\t[3]Position\t[4]Cumulative number of distinct pairs\t[5]Block id\n"); + args->kstr.l = 0; + ksprintf(&args->kstr,"# DS, distinctive sites:\n"); + ksprintf(&args->kstr,"# - chromosome\n"); + ksprintf(&args->kstr,"# - position\n"); + ksprintf(&args->kstr,"# - cumulative number of pairs distinguished by this block\n"); + ksprintf(&args->kstr,"# - block id\n"); + ksprintf(&args->kstr,"#DS\t[2]Chromosome\t[3]Position\t[4]Cumulative number of distinct pairs\t[5]Block id\n"); kbitset_t *kbs_blk = kbs_init(args->npairs); kbitset_iter_t itr; @@ -783,7 +856,9 @@ static void report_distinctive_sites(args_t *args) if ( ndiff_dbg!=ndiff ) error("Corrupted data, fixme: %d vs %d\n",ndiff_dbg,ndiff); if ( !ndiff_new ) continue; // no new pair distinguished by this site ndiff_tot += ndiff_new; - fprintf(args->fp,"DS\t%s\t%d\t%d\t%d\n",bcf_hdr_id2name(args->qry_hdr,rid),pos+1,ndiff_tot,iblock); + args->kstr.l = 0; + ksprintf(&args->kstr,"DS\t%s\t%d\t%d\t%d\n",bcf_hdr_id2name(args->qry_hdr,rid),pos+1,ndiff_tot,iblock); + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); if ( ndiff_tot < ndiff_min ) continue; // fewer than the requested number of pairs can be distinguished at this point iblock++; ndiff_tot = 0; @@ -793,24 +868,35 @@ static void report_distinctive_sites(args_t *args) } static void report(args_t *args) { - fprintf(args->fp,"INFO\tsites-compared\t%u\n",args->ncmp); - fprintf(args->fp,"INFO\tsites-skipped-no-match\t%u\n",args->nskip_no_match); - fprintf(args->fp,"INFO\tsites-skipped-multiallelic\t%u\n",args->nskip_not_ba); - fprintf(args->fp,"INFO\tsites-skipped-monoallelic\t%u\n",args->nskip_mono); - fprintf(args->fp,"INFO\tsites-skipped-no-data\t%u\n",args->nskip_no_data); - fprintf(args->fp,"INFO\tsites-skipped-GT-not-diploid\t%u\n",args->nskip_dip_GT); - fprintf(args->fp,"INFO\tsites-skipped-PL-not-diploid\t%u\n",args->nskip_dip_PL); - fprintf(args->fp,"INFO\tsites-used-PL-vs-PL\t%u\n",args->nused[0][0]); - fprintf(args->fp,"INFO\tsites-used-PL-vs-GT\t%u\n",args->nused[0][1]); - fprintf(args->fp,"INFO\tsites-used-GT-vs-PL\t%u\n",args->nused[1][0]); - fprintf(args->fp,"INFO\tsites-used-GT-vs-GT\t%u\n",args->nused[1][1]); - fprintf(args->fp,"# DC, discordance:\n"); - fprintf(args->fp,"# - query sample\n"); - fprintf(args->fp,"# - genotyped sample\n"); - fprintf(args->fp,"# - discordance (either an abstract score or number of mismatches, see -e/-u in the man page for details; smaller is better)\n"); - fprintf(args->fp,"# - negative log of HWE probability at matching sites (rare genotypes matches are more informative, bigger is better)\n"); - fprintf(args->fp,"# - number of sites compared (bigger is better)\n"); - fprintf(args->fp,"#DC\t[2]Query Sample\t[3]Genotyped Sample\t[4]Discordance\t[5]-log P(HWE)\t[6]Number of sites compared\n"); + args->kstr.l = 0; + ksprintf(&args->kstr,"INFO\tsites-compared\t%u\n",args->ncmp); + ksprintf(&args->kstr,"INFO\tsites-skipped-no-match\t%u\n",args->nskip_no_match); + ksprintf(&args->kstr,"INFO\tsites-skipped-multiallelic\t%u\n",args->nskip_not_ba); + ksprintf(&args->kstr,"INFO\tsites-skipped-monoallelic\t%u\n",args->nskip_mono); + ksprintf(&args->kstr,"INFO\tsites-skipped-no-data\t%u\n",args->nskip_no_data); + ksprintf(&args->kstr,"INFO\tsites-skipped-GT-not-diploid\t%u\n",args->nskip_dip_GT); + ksprintf(&args->kstr,"INFO\tsites-skipped-PL-not-diploid\t%u\n",args->nskip_dip_PL); + ksprintf(&args->kstr,"INFO\tsites-skipped-filtering-expression\t%u\n",args->nskip_filter); + ksprintf(&args->kstr,"INFO\tsites-used-PL-vs-PL\t%u\n",args->nused[0][0]); + ksprintf(&args->kstr,"INFO\tsites-used-PL-vs-GT\t%u\n",args->nused[0][1]); + ksprintf(&args->kstr,"INFO\tsites-used-GT-vs-PL\t%u\n",args->nused[1][0]); + ksprintf(&args->kstr,"INFO\tsites-used-GT-vs-GT\t%u\n",args->nused[1][1]); + ksprintf(&args->kstr,"# DCv2, discordance version 2:\n"); + ksprintf(&args->kstr,"# - Query sample\n"); + ksprintf(&args->kstr,"# - Genotyped sample\n"); + ksprintf(&args->kstr,"# - Discordance, given either as an abstract score or number of mismatches, see the options -E/-u\n" + "# in man page for details. Note that samples with high missingness have fewer sites compared,\n" + "# which results in lower overall discordance. Therefore it is advisable to use the average score\n" + "# per site rather than the absolute value, i.e. divide the value by the number of sites compared\n" + "# (smaller value = better match)\n"); + ksprintf(&args->kstr,"# - Average negative log of HWE probability at matching sites, attempts to quantify the following\n" + "# intuition: rare genotype matches are more informative than common genotype matches, hence two\n" + "# samples with similar discordance can be further stratified by the HWE score (bigger value = better\n" + "# match, the observed concordance was less likely to occur by chance)\n"); + ksprintf(&args->kstr,"# - Number of sites compared for this pair of samples (bigger = more informative)\n"); + ksprintf(&args->kstr,"# - Number of matching genotypes\n"); + ksprintf(&args->kstr,"#DCv2\t[2]Query Sample\t[3]Genotyped Sample\t[4]Discordance\t[5]Average -log P(HWE)\t[6]Number of sites compared\t[6]Number of matching genotypes\n"); + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); int trim = args->ntop; if ( !args->pairs ) @@ -824,26 +910,30 @@ static void report(args_t *args) int i; for (i=0; inpairs; i++) { + args->kstr.l = 0; int iqry = args->pairs[i].iqry; int igt = args->pairs[i].igt; if ( args->ndiff ) { - fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%u\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt], args->ndiff[i], - args->calc_hwe_prob ? args->hwe_prob[i] : 0, - args->ncnt[i]); + (args->calc_hwe_prob && args->nmatch[i]) ? args->hwe_prob[i]/args->nmatch[i] : 0, + args->ncnt[i], + args->nmatch[i]); } else { - fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%e\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt], args->pdiff[i], - args->calc_hwe_prob ? args->hwe_prob[i] : 0, - args->ncnt[i]); + (args->calc_hwe_prob && args->nmatch[i]) ? args->hwe_prob[i]/args->nmatch[i] : 0, + args->ncnt[i], + args->nmatch[i]); } + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); } } else if ( !trim ) @@ -855,25 +945,29 @@ static void report(args_t *args) int ngt = args->cross_check ? i : args->ngt_smpl; for (j=0; jkstr.l = 0; int igt = args->gt_smpl ? args->gt_smpl[j] : j; if ( args->ndiff ) { - fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%u\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt], args->ndiff[idx], - args->calc_hwe_prob ? args->hwe_prob[idx] : 0, - args->ncnt[idx]); + (args->calc_hwe_prob && args->nmatch[idx]) ? args->hwe_prob[idx]/args->nmatch[idx] : 0, + args->ncnt[idx], + args->nmatch[idx]); } else { - fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%e\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt], args->pdiff[idx], - args->calc_hwe_prob ? args->hwe_prob[idx] : 0, - args->ncnt[idx]); + (args->calc_hwe_prob && args->nmatch[idx]) ? args->hwe_prob[idx]/args->nmatch[idx] : 0, + args->ncnt[idx], + args->nmatch[idx]); } + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); idx++; } } @@ -888,7 +982,7 @@ static void report(args_t *args) for (j=0; jngt_smpl; j++) { if ( args->sort_by_hwe ) - arr[j].val = -args->hwe_prob[idx]; + arr[j].val = args->nmatch[idx] ? -args->hwe_prob[idx]/args->nmatch[idx] : 0; // -args->hwe_prob[idx]; else if ( args->ndiff ) arr[j].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0; else @@ -901,26 +995,30 @@ static void report(args_t *args) int iqry = args->qry_smpl ? args->qry_smpl[i] : i; for (j=0; jntop; j++) { + args->kstr.l = 0; int idx = arr[j].idx; int igt = args->gt_smpl ? args->gt_smpl[arr[j].ism] : arr[j].ism; if ( args->ndiff ) { - fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%u\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt], args->ndiff[idx], - args->calc_hwe_prob ? args->hwe_prob[idx] : 0, - args->ncnt[idx]); + (args->calc_hwe_prob && args->nmatch[idx]) ? args->hwe_prob[idx]/args->nmatch[idx] : 0, + args->ncnt[idx], + args->nmatch[idx]); } else { - fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%e\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt], args->pdiff[idx], - args->calc_hwe_prob ? args->hwe_prob[idx] : 0, - args->ncnt[idx]); + (args->calc_hwe_prob && args->nmatch[idx]) ? args->hwe_prob[idx]/args->nmatch[idx] : 0, + args->ncnt[idx], + args->nmatch[idx]); } + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); } } free(arr); @@ -936,7 +1034,7 @@ static void report(args_t *args) for (j=0; jsort_by_hwe ) - arr[k].val = -args->hwe_prob[idx]; + arr[k].val = args->nmatch[idx] ? -args->hwe_prob[idx]/args->nmatch[idx] : 0; else if ( args->ndiff ) arr[k].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0; else @@ -950,7 +1048,7 @@ static void report(args_t *args) { idx = j*(j+1)/2 + i; if ( args->sort_by_hwe ) - arr[k].val = -args->hwe_prob[idx]; + arr[k].val = args->nmatch[idx] ? -args->hwe_prob[idx]/args->nmatch[idx] : 0; else if ( args->ndiff ) arr[k].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0; else @@ -963,27 +1061,31 @@ static void report(args_t *args) int iqry = args->qry_smpl ? args->qry_smpl[i] : i; for (j=0; jntop; j++) { + args->kstr.l = 0; if ( i <= arr[j].ism ) continue; int idx = arr[j].idx; int igt = args->qry_smpl ? args->qry_smpl[arr[j].ism] : arr[j].ism; if ( args->ndiff ) { - fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%u\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->qry_hdr->samples[igt], args->ndiff[idx], - args->calc_hwe_prob ? args->hwe_prob[idx] : 0, - args->ncnt[idx]); + (args->calc_hwe_prob && args->nmatch[idx]) ? args->hwe_prob[idx]/args->nmatch[idx] : 0, + args->ncnt[idx], + args->nmatch[idx]); } else { - fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n", + ksprintf(&args->kstr,"DCv2\t%s\t%s\t%e\t%e\t%u\t%u\n", args->qry_hdr->samples[iqry], args->qry_hdr->samples[igt], args->pdiff[idx], - args->calc_hwe_prob ? args->hwe_prob[idx] : 0, - args->ncnt[idx]); + (args->calc_hwe_prob && args->nmatch[idx]) ? args->hwe_prob[idx]/args->nmatch[idx] : 0, + args->ncnt[idx], + args->nmatch[idx]); } + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); } } free(arr); @@ -1053,12 +1155,16 @@ static void usage(void) fprintf(stderr, " and TMP is a prefix of temporary files used by external sorting [/tmp/bcftools.XXXXXX]\n"); #endif fprintf(stderr, " --dry-run Stop after first record to estimate required time\n"); - fprintf(stderr, " -e, --error-probability INT Phred-scaled probability of genotyping error, 0 for faster but less accurate results [40]\n"); + fprintf(stderr, " -E, --error-probability INT Phred-scaled probability of genotyping error, 0 for faster but less accurate results [40]\n"); + fprintf(stderr, " -e, --exclude [qry|gt]:EXPR Exclude sites for which the expression is true\n"); fprintf(stderr, " -g, --genotypes FILE Genotypes to compare against\n"); fprintf(stderr, " -H, --homs-only Homozygous genotypes only, useful with low coverage data (requires -g)\n"); + fprintf(stderr, " -i, --include [qry|gt]:EXPR Include sites for which the expression is true\n"); fprintf(stderr, " --n-matches INT Print only top INT matches for each sample (sorted by average score), 0 for unlimited.\n"); fprintf(stderr, " Use negative value to sort by HWE probability rather than by discordance [0]\n"); fprintf(stderr, " --no-HWE-prob Disable calculation of HWE probability\n"); + fprintf(stderr, " -o, --output FILE Write output to a file [standard output]\n"); + fprintf(stderr, " -O, --output-type t|z t: plain tab-delimited text output, z: compressed [t]\n"); fprintf(stderr, " -p, --pairs LIST Comma-separated sample pairs to compare (qry,gt[,qry,gt..] with -g or qry,qry[,qry,qry..] w/o)\n"); fprintf(stderr, " -P, --pairs-file FILE File with tab-delimited sample pairs to compare (qry,gt with -g or qry,qry w/o)\n"); fprintf(stderr, " -r, --regions REGION Restrict to comma-separated list of regions\n"); @@ -1071,10 +1177,10 @@ static void usage(void) fprintf(stderr, " --targets-overlap 0|1|2 Include if POS in the region (0), record overlaps (1), variant overlaps (2) [0]\n"); fprintf(stderr, " -u, --use TAG1[,TAG2] Which tag to use in the query file (TAG1) and the -g file (TAG2) [PL,GT]\n"); fprintf(stderr, "Examples:\n"); - fprintf(stderr, " # Check discordance of all samples from B against all sample in A\n"); + fprintf(stderr, " # Check discordance of all samples from B against all samples in A\n"); fprintf(stderr, " bcftools gtcheck -g A.bcf B.bcf\n"); fprintf(stderr, "\n"); - fprintf(stderr, " # Limit comparisons to the fiven list of samples\n"); + fprintf(stderr, " # Limit comparisons to the given list of samples\n"); fprintf(stderr, " bcftools gtcheck -s gt:a1,a2,a3 -s qry:b1,b2 -g A.bcf B.bcf\n"); fprintf(stderr, "\n"); fprintf(stderr, " # Compare only two pairs a1,b1 and a1,b2\n"); @@ -1094,6 +1200,7 @@ int main_vcfgtcheck(int argc, char *argv[]) args->use_PLs = 40; args->regions_overlap = 1; args->targets_overlap = 0; + args->output_fname = "-"; // external sort for --distinctive-sites #ifdef _WIN32 @@ -1112,7 +1219,11 @@ int main_vcfgtcheck(int argc, char *argv[]) static struct option loptions[] = { - {"error-probability",1,0,'e'}, + {"error-probability",1,0,'E'}, // note this used to be 'e', but can easily auto-detect to assure backward compatibility + {"exclude",required_argument,0,'e'}, + {"include",required_argument,0,'i'}, + {"output",required_argument,0,'o'}, + {"output-type",required_argument,NULL,'O'}, {"use",1,0,'u'}, {"cluster",1,0,'c'}, {"GTs-only",1,0,'G'}, @@ -1139,9 +1250,78 @@ int main_vcfgtcheck(int argc, char *argv[]) {0,0,0,0} }; char *tmp; - while ((c = getopt_long(argc, argv, "hg:p:s:S:p:P:Hr:R:at:T:G:c:u:e:",loptions,NULL)) >= 0) { + while ((c = getopt_long(argc, argv, "hg:p:s:S:p:P:Hr:R:at:T:G:c:u:e:E:i:o:O:",loptions,NULL)) >= 0) { switch (c) { + case 'o': args->output_fname = optarg; break; + case 'O': + switch (optarg[0]) { + case 't': args->output_type = FT_TAB_TEXT; break; + case 'z': args->output_type = FT_VCF_GZ; break; + default: + { + args->clevel = strtol(optarg,&tmp,10); + if ( *tmp || args->clevel<0 || args->clevel>9 ) error("The output type \"%s\" not recognised\n", optarg); + } + } + if ( optarg[1] ) + { + args->clevel = strtol(optarg+1,&tmp,10); + if ( *tmp || args->clevel<0 || args->clevel>9 ) error("Could not parse argument: --output-type %s\n", optarg+1); + } + break; case 'e': + if ( !strncasecmp("gt:",optarg,3) ) + { + if ( args->gt_filter_str ) error("Error: only one -i or -e expression can be given for gt:, and they cannot be combined\n"); + args->gt_filter_str = optarg; + args->gt_filter_logic |= FLT_EXCLUDE; + } + else if ( !strncasecmp("qry:",optarg,4) ) + { + if ( args->qry_filter_str ) error("Error: only one -i or -e expression can be given for qry:, and they cannot be combined\n"); + args->qry_filter_str = optarg; + args->qry_filter_logic |= FLT_EXCLUDE; + } + else + { + // this could be the old -e, --error-probability option + args->use_PLs = strtol(optarg,&tmp,10); + if ( !tmp || *tmp ) + { + // it is not + args->gt_filter_str = optarg; + args->qry_filter_str = optarg; + args->gt_filter_logic |= FLT_EXCLUDE; + args->qry_filter_logic |= FLT_EXCLUDE; + } + else + { + fprintf(stderr,"[warning] auto-detected the old format --error-probability option, please switch from -e to -E.\n"); + } + } + break; + case 'i': + if ( !strncasecmp("gt:",optarg,3) ) + { + if ( args->gt_filter_str ) error("Error: only one -i or -e expression can be given for gt:, and they cannot be combined\n"); + args->gt_filter_str = optarg; + args->gt_filter_logic |= FLT_INCLUDE; + } + else if ( !strncasecmp("qry:",optarg,4) ) + { + if ( args->qry_filter_str ) error("Error: only one -i or -e expression can be given for qry:, and they cannot be combined\n"); + args->qry_filter_str = optarg; + args->qry_filter_logic |= FLT_INCLUDE; + } + else + { + args->gt_filter_str = optarg; + args->qry_filter_str = optarg; + args->gt_filter_logic |= FLT_INCLUDE; + args->qry_filter_logic |= FLT_INCLUDE; + } + break; + case 'E': args->use_PLs = strtol(optarg,&tmp,10); if ( !tmp || *tmp ) error("Could not parse: --error-probability %s\n", optarg); break; @@ -1267,7 +1447,9 @@ int main_vcfgtcheck(int argc, char *argv[]) gettimeofday(&t1, NULL); double delta = (t1.tv_sec - t0.tv_sec) * 1e6 + (t1.tv_usec - t0.tv_usec); fprintf(stderr,"INFO:\tTime required to process one record .. %f seconds\n",delta/1e6); - fprintf(args->fp,"INFO\tTime required to process one record .. %f seconds\n",delta/1e6); + args->kstr.l = 0; + ksprintf(&args->kstr,"INFO\tTime required to process one record .. %f seconds\n",delta/1e6); + if ( bgzf_write(args->out_fh, args->kstr.s, args->kstr.l)!=args->kstr.l ) error("Failed to write to %s\n", args->output_fname); if ( args->dry_run ) break; } } diff --git a/vcfhead.c b/vcfhead.c index 20be2a947..0b0222b52 100644 --- a/vcfhead.c +++ b/vcfhead.c @@ -1,6 +1,7 @@ /* vcfhead.c -- view VCF/BCF file headers. Copyright (C) 2021 University of Glasgow. + Copyright (C) 2023 Genome Research Ltd. Author: John Marshall @@ -41,30 +42,36 @@ int main_vcfhead(int argc, char *argv[]) "Usage: bcftools head [OPTION]... [FILE]\n" "\n" "Options:\n" -" -h, --headers INT Display INT header lines [all]\n" -" -n, --records INT Display INT variant record lines [none]\n" +" -h, --headers INT Display INT header lines [all]\n" +" -n, --records INT Display INT variant record lines [none]\n" +" -s, --samples INT Display INT records starting with the #CHROM header line [none]\n" "\n"; static const struct option loptions[] = { { "headers", required_argument, NULL, 'h' }, { "records", required_argument, NULL, 'n' }, + { "samples", required_argument, NULL, 's' }, { NULL, 0, NULL, 0 } }; int all_headers = 1; + int samples = 0; uint64_t nheaders = 0; uint64_t nrecords = 0; int c, nargs; - while ((c = getopt_long(argc, argv, "h:n:", loptions, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "h:n:s:", loptions, NULL)) >= 0) switch (c) { case 'h': all_headers = 0; nheaders = strtoull(optarg, NULL, 0); break; case 'n': nrecords = strtoull(optarg, NULL, 0); break; + case 's': nrecords = strtoull(optarg, NULL, 0); samples = 1; break; default: fputs(usage, stderr); return EXIT_FAILURE; } + if ( samples && all_headers ) all_headers = 0; + nargs = argc - optind; if (nargs == 0 && isatty(STDIN_FILENO)) { fputs(usage, stdout); @@ -99,17 +106,34 @@ int main_vcfhead(int argc, char *argv[]) bcf_hdr_format(hdr, 0, &str); fputs(ks_str(&str), stdout); } - else if (nheaders > 0) { + else if (nheaders > 0 || samples ) { bcf_hdr_format(hdr, 0, &str); char *lim = str.s; uint64_t n; + int samples_printed = 0; for (n = 0; n < nheaders; n++) { + if ( samples && !strncmp(lim,"#CHROM\t",7) ) samples_printed = 1; lim = strchr(lim, '\n'); if (lim) lim++; else break; } - if (lim) *lim = '\0'; - fputs(ks_str(&str), stdout); + if ( nheaders ) + { + char tmp; + if (lim) { tmp = *lim; *lim = '\0'; } + fputs(ks_str(&str), stdout); + if (lim) *lim = tmp; + } + if ( lim && samples && !samples_printed ) + { + while ( lim && *lim ) + { + if ( !strncmp(lim,"#CHROM\t",7) ) { fputs(lim, stdout); break; } + lim = strchr(lim, '\n'); + if (lim) lim++; + else break; + } + } } if (nrecords > 0) { diff --git a/vcfisec.c b/vcfisec.c index 4ee29b4c8..3a70cf310 100644 --- a/vcfisec.c +++ b/vcfisec.c @@ -598,7 +598,10 @@ int main_vcfisec(int argc, char *argv[]) case 't': args->targets_list = optarg; break; case 'T': args->targets_list = optarg; targets_is_file = 1; break; case 'p': args->prefix = optarg; break; - case 'w': args->write_files = optarg; break; + case 'w': + if ( args->write_files ) error("The option -w accepts a list of indices and can be given only once\n"); + args->write_files = optarg; + break; case 'i': add_filter(args, optarg, FLT_INCLUDE); break; case 'e': add_filter(args, optarg, FLT_EXCLUDE); break; case 'n': diff --git a/vcfmerge.c b/vcfmerge.c index 87b6b8a39..93aee50b0 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -34,6 +34,8 @@ THE SOFTWARE. */ #include #include #include +#include +#include #include #include #include @@ -192,6 +194,7 @@ typedef struct int keep_AC_AN; char *index_fn; int write_index; + int trim_star_allele; // 0=don't trim; 1=trim at variant sites; 2=trim at all sites } args_t; @@ -437,6 +440,11 @@ static void info_rules_init(args_t *args) if ( str.l ) kputc(',',&str); kputs("QS:sum",&str); } + if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "MIN_DP")) ) + { + if ( str.l ) kputc(',',&str); + kputs("MIN_DP:min",&str); + } if ( args->do_gvcf && bcf_hdr_idinfo_exists(args->out_hdr,BCF_HL_INFO,bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, "MinDP")) ) { if ( str.l ) kputc(',',&str); @@ -1272,32 +1280,32 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i if ( len==BCF_VL_A || len==BCF_VL_R ) { int ifrom = len==BCF_VL_A ? 1 : 0; - #define BRANCH(type_t, is_missing, is_vector_end, out_type_t) { \ - type_t *src = (type_t *) info->vptr; \ + #define BRANCH(type_t, convert, is_missing, is_vector_end, out_type_t) { \ + uint8_t *src = info->vptr; \ out_type_t *tgt = (out_type_t *) agr->buf; \ int iori, inew; \ - for (iori=ifrom; iorin_allele; iori++) \ + for (iori=ifrom; iorin_allele; iori++, src += sizeof(type_t)) \ { \ + type_t val = convert(src); \ if ( is_vector_end ) break; \ if ( is_missing ) continue; \ inew = als->map[iori] - ifrom; \ - tgt[inew] = *src; \ - src++; \ + tgt[inew] = val; \ } \ } switch (info->type) { - case BCF_BT_INT8: BRANCH(int8_t, *src==bcf_int8_missing, *src==bcf_int8_vector_end, int); break; - case BCF_BT_INT16: BRANCH(int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, int); break; - case BCF_BT_INT32: BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, int); break; - case BCF_BT_FLOAT: BRANCH(float, bcf_float_is_missing(*src), bcf_float_is_vector_end(*src), float); break; + case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, val==bcf_int8_missing, val==bcf_int8_vector_end, int); break; + case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, val==bcf_int16_missing, val==bcf_int16_vector_end, int); break; + case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, val==bcf_int32_missing, val==bcf_int32_vector_end, int); break; + case BCF_BT_FLOAT: BRANCH(float, le_to_float, bcf_float_is_missing(val), bcf_float_is_vector_end(val), float); break; default: fprintf(stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1); } #undef BRANCH } else { - #define BRANCH(type_t, is_missing, is_vector_end, out_type_t) { \ - type_t *src = (type_t *) info->vptr; \ + #define BRANCH(type_t, convert, is_missing, is_vector_end, out_type_t) { \ + uint8_t *src = info->vptr; \ out_type_t *tgt = (out_type_t *) agr->buf; \ int iori,jori, inew,jnew; \ for (iori=0; iorin_allele; iori++) \ @@ -1307,19 +1315,20 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i { \ jnew = als->map[jori]; \ int kori = iori*(iori+1)/2 + jori; \ + type_t val = convert(&src[kori * sizeof(type_t)]); \ if ( is_vector_end ) break; \ if ( is_missing ) continue; \ int knew = inew>jnew ? inew*(inew+1)/2 + jnew : jnew*(jnew+1)/2 + inew; \ - tgt[knew] = src[kori]; \ + tgt[knew] = val; \ } \ if ( jori<=iori ) break; \ } \ } switch (info->type) { - case BCF_BT_INT8: BRANCH(int8_t, src[kori]==bcf_int8_missing, src[kori]==bcf_int8_vector_end, int); break; - case BCF_BT_INT16: BRANCH(int16_t, src[kori]==bcf_int16_missing, src[kori]==bcf_int16_vector_end, int); break; - case BCF_BT_INT32: BRANCH(int32_t, src[kori]==bcf_int32_missing, src[kori]==bcf_int32_vector_end, int); break; - case BCF_BT_FLOAT: BRANCH(float, bcf_float_is_missing(src[kori]), bcf_float_is_vector_end(src[kori]), float); break; + case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, val==bcf_int8_missing, val==bcf_int8_vector_end, int); break; + case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, val==bcf_int16_missing, val==bcf_int16_vector_end, int); break; + case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, val==bcf_int32_missing, val==bcf_int32_vector_end, int); break; + case BCF_BT_FLOAT: BRANCH(float, le_to_float, bcf_float_is_missing(val), bcf_float_is_vector_end(val), float); break; default: fprintf(stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1); } #undef BRANCH @@ -1488,12 +1497,12 @@ static inline int max_used_gt_ploidy(bcf_fmt_t *fmt, int nsmpl) { int i,j, max_ploidy = 0; - #define BRANCH(type_t, vector_end) { \ - type_t *ptr = (type_t*) fmt->p; \ + #define BRANCH(type_t, convert, vector_end) { \ + uint8_t *ptr = fmt->p; \ for (i=0; in; j++) \ - if ( ptr[j]==vector_end ) break; \ + if ( convert(&ptr[j * sizeof(type_t)])==vector_end ) break; \ if ( j==fmt->n ) \ { \ /* all fields were used */ \ @@ -1501,14 +1510,14 @@ static inline int max_used_gt_ploidy(bcf_fmt_t *fmt, int nsmpl) break; \ } \ if ( max_ploidy < j ) max_ploidy = j; \ - ptr += fmt->n; \ + ptr += fmt->n * sizeof(type_t); \ } \ } switch (fmt->type) { - case BCF_BT_INT8: BRANCH(int8_t, bcf_int8_vector_end); break; - case BCF_BT_INT16: BRANCH(int16_t, bcf_int16_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, bcf_int32_vector_end); break; + case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_vector_end); break; + case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_vector_end); break; + case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_vector_end); break; default: error("Unexpected case: %d\n", fmt->type); } #undef BRANCH @@ -1598,19 +1607,22 @@ void init_local_alleles(args_t *args, bcf1_t *out, int ifmt_PL) int *map = ma->buf[i].rec[ma->buf[i].cur].map; double *allele_prob = ma->tmpd; int *idx = ma->tmpi; - #define BRANCH(src_type_t, src_is_missing, src_is_vector_end, pl2prob_idx) { \ - src_type_t *src = (src_type_t*) fmt_ori->p; \ + #define BRANCH(src_type_t, convert, src_is_missing, src_is_vector_end, pl2prob_idx) { \ + uint8_t *src = fmt_ori->p; \ for (j=0; jn_allele; k++) allele_prob[k] = 0; \ for (k=0; kn_allele; k++) \ for (l=0; l<=k; l++) \ { \ - if ( src_is_missing || src_is_vector_end ) { src++; continue; } \ - double prob = ma->pl2prob[pl2prob_idx]; \ - allele_prob[k] += prob; \ - allele_prob[l] += prob; \ - src++; \ + src_type_t val = convert(src); \ + if ( !(src_is_missing) && !(src_is_vector_end) ) \ + { \ + double prob = ma->pl2prob[pl2prob_idx]; \ + allele_prob[k] += prob; \ + allele_prob[l] += prob; \ + } \ + src += sizeof(src_type_t); \ } \ /* insertion sort by allele probability, descending order, with the twist that REF (idx=0) always comes first */ \ allele_prob++; idx[0] = -1; idx++; /* keep REF first */ \ @@ -1637,9 +1649,9 @@ void init_local_alleles(args_t *args, bcf1_t *out, int ifmt_PL) } switch (fmt_ori->type) { - case BCF_BT_INT8: BRANCH( int8_t, *src==bcf_int8_missing, *src==bcf_int8_vector_end, *src); break; - case BCF_BT_INT16: BRANCH(int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, *src>=0 && *src=0 && *src=0 && val=0 && valtype); } #undef BRANCH @@ -1735,8 +1747,8 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out) continue; } - #define BRANCH(type_t, vector_end) { \ - type_t *p_ori = (type_t*) fmt_ori->p; \ + #define BRANCH(type_t, convert, vector_end) { \ + uint8_t *p_ori = fmt_ori->p; \ if ( !ma->buf[i].rec[irec].als_differ ) \ { \ /* the allele numbering is unchanged */ \ @@ -1744,14 +1756,15 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out) { \ for (k=0; kn; k++) \ { \ - if ( p_ori[k]==vector_end ) break; /* smaller ploidy */ \ + type_t val = convert(&p_ori[k * sizeof(type_t)]); \ + if ( val==vector_end ) break; /* smaller ploidy */ \ ma->smpl_ploidy[ismpl+j]++; \ - if ( bcf_gt_is_missing(p_ori[k]) ) tmp[k] = 0; /* missing allele */ \ - else tmp[k] = p_ori[k]; \ + if ( bcf_gt_is_missing(val) ) tmp[k] = 0; /* missing allele */ \ + else tmp[k] = val; \ } \ for (; kn; \ + p_ori += fmt_ori->n * sizeof(type_t); \ } \ ismpl += bcf_hdr_nsamples(hdr); \ continue; \ @@ -1761,27 +1774,28 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out) { \ for (k=0; kn; k++) \ { \ - if ( p_ori[k]==vector_end ) break; /* smaller ploidy */ \ + type_t val = convert(&p_ori[k * sizeof(type_t)]); \ + if ( val==vector_end ) break; /* smaller ploidy */ \ ma->smpl_ploidy[ismpl+j]++; \ - if ( bcf_gt_is_missing(p_ori[k]) ) tmp[k] = 0; /* missing allele */ \ + if ( bcf_gt_is_missing(val) ) tmp[k] = 0; /* missing allele */ \ else \ { \ - int al = (p_ori[k]>>1) - 1; \ + int al = (val>>1) - 1; \ al = al<=0 ? al + 1 : ma->buf[i].rec[irec].map[al] + 1; \ - tmp[k] = (al << 1) | ((p_ori[k])&1); \ + tmp[k] = (al << 1) | ((val)&1); \ } \ } \ for (; kn; \ + p_ori += fmt_ori->n * sizeof(type_t); \ } \ ismpl += bcf_hdr_nsamples(hdr); \ } switch (fmt_ori->type) { - case BCF_BT_INT8: BRANCH(int8_t, bcf_int8_vector_end); break; - case BCF_BT_INT16: BRANCH(int16_t, bcf_int16_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, bcf_int32_vector_end); break; + case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_vector_end); break; + case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_vector_end); break; + case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_vector_end); break; default: error("Unexpected case: %d\n", fmt_ori->type); } #undef BRANCH @@ -1959,10 +1973,10 @@ void merge_localized_numberG_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf if ( 2*fmt_ori->n!=line->n_allele*(line->n_allele+1) ) error("Todo: localization of missing or haploid Number=G tags\n"); // localize - #define BRANCH(tgt_type_t, src_type_t, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \ + #define BRANCH(tgt_type_t, src_type_t, convert, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \ for (j=0; jp + j*fmt_ori->n; \ + uint8_t *src = fmt_ori->p + sizeof(src_type_t)*j*fmt_ori->n; \ tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \ int *laa = ma->laa + (1+args->local_alleles)*ismpl; \ int ii,ij,tgt_idx = 0; \ @@ -1972,9 +1986,10 @@ void merge_localized_numberG_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf for (ij=0; ij<=ii; ij++) \ { \ int src_idx = bcf_alleles2gt(laa[ii],laa[ij]); \ + src_type_t val = convert(&src[src_idx * sizeof(src_type_t)]); \ if ( src_is_missing ) tgt_set_missing; \ else if ( src_is_vector_end ) break; \ - else tgt[tgt_idx] = src[src_idx]; \ + else tgt[tgt_idx] = val; \ tgt_idx++; \ } \ } \ @@ -1985,10 +2000,10 @@ void merge_localized_numberG_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf } switch (fmt_ori->type) { - case BCF_BT_INT8: BRANCH(int32_t, int8_t, src[src_idx]==bcf_int8_missing, src[src_idx]==bcf_int8_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; - case BCF_BT_INT16: BRANCH(int32_t, int16_t, src[src_idx]==bcf_int16_missing, src[src_idx]==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, int32_t, src[src_idx]==bcf_int32_missing, src[src_idx]==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; - case BCF_BT_FLOAT: BRANCH(float, float, bcf_float_is_missing(src[src_idx]), bcf_float_is_vector_end(src[src_idx]), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break; + case BCF_BT_INT8: BRANCH(int32_t, int8_t, le_to_i8, val==bcf_int8_missing, val==bcf_int8_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; + case BCF_BT_INT16: BRANCH(int32_t, int16_t, le_to_i16, val==bcf_int16_missing, val==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; + case BCF_BT_INT32: BRANCH(int32_t, int32_t, le_to_i16, val==bcf_int32_missing, val==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; + case BCF_BT_FLOAT: BRANCH(float, float, le_to_float, bcf_float_is_missing(val), bcf_float_is_vector_end(val), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break; default: error("Unexpected case: %d, %s\n", fmt_ori->type, key); } #undef BRANCH @@ -2058,10 +2073,10 @@ void merge_localized_numberAR_format_field(args_t *args, bcf_fmt_t **fmt_map, bc } // localize - #define BRANCH(tgt_type_t, src_type_t, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \ + #define BRANCH(tgt_type_t, src_type_t, convert, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \ for (j=0; jp + j*fmt_ori->n; \ + uint8_t *src = fmt_ori->p + sizeof(src_type_t)*j*fmt_ori->n; \ tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \ int *laa = ma->laa + (1+args->local_alleles)*ismpl; \ int ii,tgt_idx = 0; \ @@ -2069,9 +2084,10 @@ void merge_localized_numberAR_format_field(args_t *args, bcf_fmt_t **fmt_map, bc { \ if ( laa[ii]==bcf_int32_missing || laa[ii]==bcf_int32_vector_end ) break; \ int src_idx = laa[ii] - ibeg; \ + src_type_t val = convert(&src[src_idx * sizeof(src_type_t)]); \ if ( src_is_missing ) tgt_set_missing; \ else if ( src_is_vector_end ) break; \ - else tgt[tgt_idx] = src[src_idx]; \ + else tgt[tgt_idx] = val; \ tgt_idx++; \ } \ if ( !tgt_idx ) { tgt_set_missing; tgt_idx++; } \ @@ -2081,10 +2097,10 @@ void merge_localized_numberAR_format_field(args_t *args, bcf_fmt_t **fmt_map, bc } switch (fmt_ori->type) { - case BCF_BT_INT8: BRANCH(int32_t, int8_t, src[src_idx]==bcf_int8_missing, src[src_idx]==bcf_int8_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; - case BCF_BT_INT16: BRANCH(int32_t, int16_t, src[src_idx]==bcf_int16_missing, src[src_idx]==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, int32_t, src[src_idx]==bcf_int32_missing, src[src_idx]==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; - case BCF_BT_FLOAT: BRANCH(float, float, bcf_float_is_missing(src[src_idx]), bcf_float_is_vector_end(src[src_idx]), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break; + case BCF_BT_INT8: BRANCH(int32_t, int8_t, le_to_i8, val==bcf_int8_missing, val==bcf_int8_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; + case BCF_BT_INT16: BRANCH(int32_t, int16_t, le_to_i16, val==bcf_int16_missing, val==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; + case BCF_BT_INT32: BRANCH(int32_t, int32_t, le_to_i32, val==bcf_int32_missing, val==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break; + case BCF_BT_FLOAT: BRANCH(float, float, le_to_float, bcf_float_is_missing(val), bcf_float_is_vector_end(val), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break; default: error("Unexpected case: %d, %s\n", fmt_ori->type, key); } #undef BRANCH @@ -2201,7 +2217,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule } // set the values - #define BRANCH(tgt_type_t, src_type_t, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \ + #define BRANCH(tgt_type_t, src_type_t, convert, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \ int j, l, k; \ tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \ if ( !fmt_ori ) \ @@ -2214,7 +2230,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule ismpl += bcf_hdr_nsamples(hdr); \ continue; \ } \ - src_type_t *src = (src_type_t*) fmt_ori->p; \ + uint8_t *src = fmt_ori->p; \ if ( (length!=BCF_VL_G && length!=BCF_VL_A && length!=BCF_VL_R) || (line->n_allele==out->n_allele && !ma->buf[i].rec[irec].als_differ) ) \ { \ /* alleles unchanged, copy over */ \ @@ -2224,11 +2240,11 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule { \ if ( src_is_vector_end ) break; \ else if ( src_is_missing ) tgt_set_missing; \ - else *tgt = *src; \ - tgt++; src++; \ + else *tgt = convert(src); \ + tgt++; src += sizeof(src_type_t); \ } \ for (k=l; kn - l; \ + src += sizeof(src_type_t) * (fmt_ori->n - l); \ } \ ismpl += bcf_hdr_nsamples(hdr); \ continue; \ @@ -2240,8 +2256,14 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule for (j=0; jtmp_arr + (ismpl+j)*nsize; \ - src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \ - if ( (src_is_missing && fmt_ori->n==1) || (++src && src_is_vector_end) ) \ + src = fmt_ori->p + sizeof(src_type_t) * j * fmt_ori->n; \ + int tag_missing = src_is_missing && fmt_ori->n==1; \ + if (!tag_missing) \ + { \ + src += sizeof(src_type_t); \ + tag_missing = src_is_vector_end ; \ + } \ + if ( tag_missing ) \ { \ /* tag with missing value "." */ \ tgt_set_missing; \ @@ -2252,9 +2274,10 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule int ngsize = haploid ? out->n_allele : out->n_allele*(out->n_allele + 1)/2; \ if ( ma->buf[i].unkn_allele ) /* Use value from the unknown allele when available */ \ { \ - src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \ + src = fmt_ori->p + sizeof(src_type_t)*j*fmt_ori->n; \ int iunkn = haploid ? ma->buf[i].unkn_allele : (ma->buf[i].unkn_allele+1)*(ma->buf[i].unkn_allele + 2)/2 - 1; \ - for (l=0; ltype==MERGE_MISSING_CONST ) \ { \ @@ -2262,9 +2285,13 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule } \ else if ( mrule && mrule->type==MERGE_MISSING_MAX ) \ { \ - src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \ - src_type_t max = src[0]; \ - for (l=1; ln; l++) if ( max < src[l] ) max = src[l]; \ + src = fmt_ori->p + sizeof(src_type_t)*j*fmt_ori->n; \ + src_type_t max = convert(src); \ + for (l=1; ln; l++) \ + { \ + src_type_t val = convert(&src[l * sizeof(src_type_t)]); \ + if ( max < val ) max = val; \ + } \ for (l=0; ln_allele; iori++) \ { \ inew = ma->buf[i].rec[irec].map[iori]; \ - src = (src_type_t*) fmt_ori->p + j*fmt_ori->n + iori; \ + src = fmt_ori->p + (j*fmt_ori->n + iori) * sizeof(src_type_t); \ tgt = (tgt_type_t *) ma->tmp_arr + (ismpl+j)*nsize + inew; \ if ( src_is_vector_end ) break; \ if ( src_is_missing ) tgt_set_missing; \ - else *tgt = *src; \ + else *tgt = convert(src); \ } \ } \ else \ @@ -2297,7 +2324,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule jnew = ma->buf[i].rec[irec].map[jori]; \ int kori = iori*(iori+1)/2 + jori; \ int knew = inew>jnew ? inew*(inew+1)/2 + jnew : jnew*(jnew+1)/2 + inew; \ - src = (src_type_t*) fmt_ori->p + j*fmt_ori->n + kori; \ + src = fmt_ori->p + (j*fmt_ori->n + kori) * sizeof(src_type_t); \ tgt = (tgt_type_t *) ma->tmp_arr + (ismpl+j)*nsize + knew; \ if ( src_is_vector_end ) \ { \ @@ -2305,7 +2332,7 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule break; \ } \ if ( src_is_missing ) tgt_set_missing; \ - else *tgt = *src; \ + else *tgt = convert(src); \ } \ } \ } \ @@ -2318,19 +2345,25 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule for (j=0; jtmp_arr + (ismpl+j)*nsize; \ - src = (src_type_t*) (fmt_ori->p + j*fmt_ori->size); \ - if ( (src_is_missing && fmt_ori->n==1) || (++src && src_is_vector_end) ) \ + src = fmt_ori->p + sizeof(src_type_t) * j * fmt_ori->size; \ + int tag_missing = src_is_missing && fmt_ori->n==1; \ + if (!tag_missing) { \ + src += sizeof(src_type_t); \ + tag_missing = src_is_vector_end ; \ + } \ + if ( tag_missing ) \ { \ /* tag with missing value "." */ \ tgt_set_missing; \ for (l=1; lp + j*fmt_ori->size); \ + src = fmt_ori->p + sizeof(src_type_t) * j *fmt_ori->size; \ if ( ma->buf[i].unkn_allele ) /* Use value from the unknown allele when available */ \ { \ int iunkn = ma->buf[i].unkn_allele; \ - for (l=0; ltype==MERGE_MISSING_CONST ) \ { \ @@ -2338,9 +2371,13 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule } \ else if ( mrule && mrule->type==MERGE_MISSING_MAX ) \ { \ - src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \ - src_type_t max = src[0]; \ - for (l=1; ln; l++) if ( max < src[l] ) max = src[l]; \ + src = fmt_ori->p + sizeof(src_type_t)*j*fmt_ori->n; \ + src_type_t max = convert(src); \ + for (l=1; ln; l++) \ + { \ + src_type_t val = convert(&src[l * sizeof(src_type_t)]); \ + if ( max < val ) max = val; \ + } \ for (l=0; ltmp_arr + (ismpl+j)*nsize + inew; \ if ( src_is_vector_end ) break; \ if ( src_is_missing ) tgt_set_missing; \ - else *tgt = *src; \ - src++; \ + else *tgt = convert(src); \ + src += sizeof(src_type_t); \ } \ } \ } \ @@ -2363,10 +2400,10 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, missing_rule_t *mrule } switch (type) { - case BCF_BT_INT8: BRANCH(int32_t, int8_t, *src==bcf_int8_missing, *src==bcf_int8_vector_end, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break; - case BCF_BT_INT16: BRANCH(int32_t, int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break; - case BCF_BT_INT32: BRANCH(int32_t, int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break; - case BCF_BT_FLOAT: BRANCH(float, float, bcf_float_is_missing(*src), bcf_float_is_vector_end(*src), bcf_float_set_missing(*tgt), bcf_float_set_vector_end(*tgt)); break; + case BCF_BT_INT8: BRANCH(int32_t, int8_t, le_to_i8, le_to_i8(src)==bcf_int8_missing, le_to_i8(src)==bcf_int8_vector_end, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break; + case BCF_BT_INT16: BRANCH(int32_t, int16_t, le_to_i16, le_to_i16(src)==bcf_int16_missing, le_to_i16(src)==bcf_int16_vector_end, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break; + case BCF_BT_INT32: BRANCH(int32_t, int32_t, le_to_i32, le_to_i32(src)==bcf_int32_missing, le_to_i32(src)==bcf_int32_vector_end, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break; + case BCF_BT_FLOAT: BRANCH(float, float, le_to_float, bcf_float_is_missing(le_to_float(src)), bcf_float_is_vector_end(le_to_float(src)), bcf_float_set_missing(*tgt), bcf_float_set_vector_end(*tgt)); break; default: error("Unexpected case: %d, %s\n", type, key); } #undef BRANCH @@ -2582,10 +2619,20 @@ void gvcf_write_block(args_t *args, int start, int end) } else bcf_update_info_int32(args->out_hdr, out, "END", NULL, 0); + + int iunseen; + if ( args->trim_star_allele && (out->n_allele > 2 || args->trim_star_allele > 1) && (iunseen=get_unseen_allele(out)) && iunseen>0 ) + { + // the unobserved star allele should be trimmed, either it is variant site or trimming of all sites was requested + kbitset_t *rm_set = kbs_init(out->n_allele); + kbs_insert(rm_set, iunseen); + if ( bcf_remove_allele_set(args->out_hdr,out,rm_set) ) + error("[%s] Error: failed to trim the unobserved allele at %s:%"PRIhts_pos"\n",__func__,bcf_seqname(args->out_hdr,out),out->pos+1); + kbs_destroy(rm_set); + } if ( bcf_write1(args->out_fh, args->out_hdr, out)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname); bcf_clear1(out); - // Inactivate blocks which do not extend beyond END and find new gvcf_min min = INT_MAX; for (i=0; ifiles->nreaders; i++) @@ -3215,6 +3262,16 @@ void merge_line(args_t *args) if ( args->do_gvcf ) bcf_update_info_int32(args->out_hdr, out, "END", NULL, 0); merge_format(args, out); + int iunseen; + if ( args->trim_star_allele && (out->n_allele > 2 || args->trim_star_allele > 1) && (iunseen=get_unseen_allele(out)) && iunseen>0 ) + { + // the unobserved star allele should be trimmed, either it is variant site or trimming of all sites was requested + kbitset_t *rm_set = kbs_init(out->n_allele); + kbs_insert(rm_set, iunseen); + if ( bcf_remove_allele_set(args->out_hdr,out,rm_set) ) + error("[%s] Error: failed to trim the unobserved allele at %s:%"PRIhts_pos"\n",__func__,bcf_seqname(args->out_hdr,out),out->pos+1); + kbs_destroy(rm_set); + } if ( bcf_write1(args->out_fh, args->out_hdr, out)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname); bcf_clear1(out); } @@ -3346,7 +3403,7 @@ void merge_vcf(args_t *args) } else if ( args->write_index && init_index(args->out_fh,args->out_hdr,args->output_fname,&args->index_fn)<0 ) error("Error: failed to initialise index for %s\n",args->output_fname); - if ( args->collapse==COLLAPSE_NONE ) args->vcmp = vcmp_init(); + args->vcmp = vcmp_init(); args->maux = maux_init(args); args->out_line = bcf_init1(); args->tmph = kh_init(strdict); @@ -3414,11 +3471,11 @@ static void usage(void) fprintf(stderr, " -0 --missing-to-ref Assume genotypes at missing sites are 0/0\n"); fprintf(stderr, " -f, --apply-filters LIST Require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n"); fprintf(stderr, " -F, --filter-logic x|+ Remove filters if some input is PASS (\"x\"), or apply all filters (\"+\") [+]\n"); - fprintf(stderr, " -g, --gvcf -|REF.FA Merge gVCF blocks, INFO/END tag is expected. Implies -i QS:sum,MinDP:min,I16:sum,IDV:max,IMF:max -M PL:max,AD:0\n"); + fprintf(stderr, " -g, --gvcf -|REF.FA Merge gVCF blocks, INFO/END tag is expected. Implies -i QS:sum,MinDP:min,MIN_DP:min,I16:sum,IDV:max,IMF:max -M PL:max,AD:0\n"); fprintf(stderr, " -i, --info-rules TAG:METHOD,.. Rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n"); fprintf(stderr, " -l, --file-list FILE Read file names from the file\n"); - fprintf(stderr, " -L, --local-alleles INT EXPERIMENTAL: if more than ALT alleles are encountered, drop FMT/PL and output LAA+LPL instead; 0=unlimited [0]\n"); - fprintf(stderr, " -m, --merge STRING Allow multiallelic records for , see man page for details [both]\n"); + fprintf(stderr, " -L, --local-alleles INT If more than INT alt alleles are encountered, drop FMT/PL and output LAA+LPL instead; 0=unlimited [0]\n"); + fprintf(stderr, " -m, --merge STRING[*|**] Allow multiallelic records for snps,indels,both,snp-ins-del,all,none,id,*,**; see man page for details [both]\n"); fprintf(stderr, " -M, --missing-rules TAG:METHOD Rules for replacing missing values in numeric vectors (.,0,max) when unknown allele <*> is not present [.]\n"); fprintf(stderr, " --no-index Merge unindexed files, the same chromosomal order is required and -r/-R are not allowed\n"); fprintf(stderr, " --no-version Do not append version and command line to the header\n"); @@ -3427,7 +3484,7 @@ static void usage(void) fprintf(stderr, " -r, --regions REGION Restrict to comma-separated list of regions\n"); fprintf(stderr, " -R, --regions-file FILE Restrict to regions listed in a file\n"); fprintf(stderr, " --regions-overlap 0|1|2 Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1]\n"); - fprintf(stderr, " --threads INT Use multithreading with worker threads [0]\n"); + fprintf(stderr, " --threads INT Use multithreading with INT worker threads [0]\n"); fprintf(stderr, " --write-index Automatically index the output files [off]\n"); fprintf(stderr, "\n"); exit(1); @@ -3520,17 +3577,23 @@ int main_vcfmerge(int argc, char *argv[]) } break; case 'm': + { + int len = strlen(optarg); + if ( optarg[len-1]=='*' ) { args->trim_star_allele++; len--; } + if ( optarg[len-1]=='*' ) { args->trim_star_allele++; len--; } + if ( optarg[len-1]==',' ) len--; args->collapse = COLLAPSE_NONE; - if ( !strcmp(optarg,"snps") ) args->collapse |= COLLAPSE_SNPS; - else if ( !strcmp(optarg,"indels") ) args->collapse |= COLLAPSE_INDELS; - else if ( !strcmp(optarg,"both") ) args->collapse |= COLLAPSE_BOTH; - else if ( !strcmp(optarg,"any") ) args->collapse |= COLLAPSE_ANY; - else if ( !strcmp(optarg,"all") ) args->collapse |= COLLAPSE_ANY; - else if ( !strcmp(optarg,"none") ) args->collapse = COLLAPSE_NONE; - else if ( !strcmp(optarg,"snp-ins-del") ) args->collapse = COLLAPSE_SNP_INS_DEL|COLLAPSE_SNPS; - else if ( !strcmp(optarg,"id") ) { args->collapse = COLLAPSE_NONE; args->merge_by_id = 1; } + if ( !strncmp(optarg,"snp-ins-del",len) ) args->collapse = COLLAPSE_SNP_INS_DEL|COLLAPSE_SNPS; + else if ( !strncmp(optarg,"snps",len) ) args->collapse |= COLLAPSE_SNPS; + else if ( !strncmp(optarg,"indels",len) ) args->collapse |= COLLAPSE_INDELS; + else if ( !strncmp(optarg,"id",len) ) { args->collapse = COLLAPSE_NONE; args->merge_by_id = 1; } + else if ( !strncmp(optarg,"any",len) ) args->collapse |= COLLAPSE_ANY; + else if ( !strncmp(optarg,"all",len) ) args->collapse |= COLLAPSE_ANY; + else if ( !strncmp(optarg,"both",len) ) args->collapse |= COLLAPSE_BOTH; + else if ( !strncmp(optarg,"none",len) ) args->collapse = COLLAPSE_NONE; else error("The -m type \"%s\" is not recognised.\n", optarg); break; + } case 'f': args->files->apply_filters = optarg; break; case 'r': args->regions_list = optarg; break; case 'R': args->regions_list = optarg; regions_is_file = 1; break; diff --git a/vcfnorm.c b/vcfnorm.c index 02ad322d1..fe77e2fc7 100644 --- a/vcfnorm.c +++ b/vcfnorm.c @@ -100,7 +100,7 @@ typedef struct struct { int tot, set, swap; } nref; char **argv, *output_fname, *ref_fname, *vcf_fname, *region, *targets; int argc, rmdup, output_type, n_threads, check_ref, strict_filter, do_indels, clevel; - int nchanged, nskipped, nsplit, ntotal, mrows_op, mrows_collapse, parsimonious; + int nchanged, nskipped, nsplit, njoined, ntotal, mrows_op, mrows_collapse, parsimonious; int record_cmd_line, force, force_warned, keep_sum_ad; abuf_t *abuf; abuf_opt_t atomize; @@ -559,6 +559,7 @@ static int realign(args_t *args, bcf1_t *line) hts_expand0(kstring_t,line->n_allele,args->ntmp_del,args->tmp_del); kstring_t *als = args->tmp_als; kstring_t *del = args->tmp_del; + int symbolic_alts = 1; for (i=0; in_allele; i++) { del[i].l = 0; @@ -576,12 +577,13 @@ static int realign(args_t *args, bcf1_t *line) replace_iupac_codes(ref,nref); // any non-ACGT character in fasta ref is replaced with N als[0].l = 0; kputs(ref, &als[0]); - als[i].l = 0; - kputsn(ref,1,&als[i]); - kputs(line->d.allele[i],&del[i]); - continue; } + als[i].l = 0; + kputsn(als[0].s,1,&als[i]); + kputs(line->d.allele[i],&del[i]); + continue; } + if ( i>0 ) symbolic_alts = 0; if ( line->d.allele[i][0]=='*' ) return ERR_SPANNING_DELETION; // spanning deletion if ( has_non_acgtn(line->d.allele[i],line->shared.l) ) { @@ -610,8 +612,15 @@ static int realign(args_t *args, bcf1_t *line) else new_pos = realign_right(args, line); - // Have the alleles changed? - als[0].s[ als[0].l ] = 0; // in order for strcmp to work + // Have the alleles changed? Consider could have expanded the REF allele. In that + // case it must be trimmed, however the new REF length must reflect the entire length. + als[0].s[ als[0].l ] = 0; // for strcmp to work + int new_reflen = strlen(als[0].s); + if ( symbolic_alts ) + { + als[0].l = 1; + als[0].s[ als[0].l ] = 0; + } if ( new_pos==line->pos && !strcasecmp(line->d.allele[0],als[0].s) ) return ERR_OK; set_old_rec_tag(args, line, line, 0); @@ -629,7 +638,6 @@ static int realign(args_t *args, bcf1_t *line) args->nchanged++; // Update INFO/END if necessary - int new_reflen = strlen(line->d.allele[0]); if ( (new_pos!=line->pos || reflen!=new_reflen) && bcf_get_info_int32(args->hdr, line, "END", &args->int32_arr, &args->nint32_arr)==1 ) { // bcf_update_alleles_str() messed up rlen because line->pos changed. This will be fixed by bcf_update_info_int32() @@ -1822,6 +1830,7 @@ static void merge_biallelics_to_multiallelic(args_t *args, bcf1_t *dst, bcf1_t * else if ( type==BCF_HT_INT || type==BCF_HT_REAL ) merge_format_numeric(args, lines, nlines, fmt, dst); else merge_format_string(args, lines, nlines, fmt, dst); } + args->njoined++; } #define SWAP(type_t, a, b) { type_t t = a; a = b; b = t; } @@ -2020,7 +2029,7 @@ static void init_data(args_t *args) else args->keep_sum_ad = -1; - args->out_hdr = bcf_hdr_dup(args->hdr); + args->out_hdr = args->hdr; if ( args->old_rec_tag ) bcf_hdr_printf(args->out_hdr,"##INFO=",args->old_rec_tag); @@ -2042,7 +2051,10 @@ static void init_data(args_t *args) args->abuf = abuf_init(args->hdr, SPLIT); abuf_set_opt(args->abuf, bcf_hdr_t*, BCF_HDR, args->out_hdr); if ( args->old_rec_tag ) + { abuf_set_opt(args->abuf, const char*, INFO_TAG, args->old_rec_tag); + if ( bcf_hdr_sync(args->out_hdr)!=0 ) error("bcf_hdr_sync failed\n"); + } abuf_set_opt(args->abuf, int, STAR_ALLELE, args->use_star_allele); } if ( args->gff_fname ) @@ -2054,6 +2066,7 @@ static void init_data(args_t *args) args->idx_tscript = gff_get(args->gff,idx_tscript); args->itr_tscript = regitr_init(NULL); } + args->out_hdr = bcf_hdr_dup(args->out_hdr); } static void destroy_data(args_t *args) @@ -2249,7 +2262,7 @@ static void normalize_vcf(args_t *args) } if ( hts_close(args->out)!=0 ) error("[%s] Error: close failed .. %s\n", __func__,args->output_fname); - fprintf(stderr,"Lines total/split/realigned/skipped:\t%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->nchanged,args->nskipped); + fprintf(stderr,"Lines total/split/joined/realigned/skipped:\t%d/%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->njoined,args->nchanged,args->nskipped); if ( args->check_ref & CHECK_REF_FIX ) fprintf(stderr,"REF/ALT total/modified/added: \t%d/%d/%d\n", args->nref.tot,args->nref.swap,args->nref.set); } @@ -2286,7 +2299,7 @@ static void usage(void) fprintf(stderr, " -t, --targets REGION Similar to -r but streams rather than index-jumps\n"); fprintf(stderr, " -T, --targets-file FILE Similar to -R but streams rather than index-jumps\n"); fprintf(stderr, " --targets-overlap 0|1|2 Include if POS in the region (0), record overlaps (1), variant overlaps (2) [0]\n"); - fprintf(stderr, " --threads INT Use multithreading with worker threads [0]\n"); + fprintf(stderr, " --threads INT Use multithreading with INT worker threads [0]\n"); fprintf(stderr, " -w, --site-win INT Buffer for sorting lines which changed position during realignment [1000]\n"); fprintf(stderr, " --write-index Automatically index the output files [off]\n"); fprintf(stderr, "\n"); diff --git a/vcfquery.c b/vcfquery.c index 5f4eb07c6..b741b5d1e 100644 --- a/vcfquery.c +++ b/vcfquery.c @@ -55,7 +55,8 @@ typedef struct bcf_hdr_t *header; int sample_is_file; char **argv, *format_str, *sample_list, *targets_list, *regions_list, *vcf_list, *fn_out; - int argc, list_columns, print_header, allow_undef_tags, force_samples; + char *print_filtered; + int argc, list_columns, print_header, allow_undef_tags, force_samples, force_newline; FILE *out; } args_t; @@ -94,7 +95,7 @@ static void init_data(args_t *args) smpl_ilist_destroy(ilist); } args->convert = convert_init(args->header, samples, nsamples, args->format_str); - convert_set_option(args->convert, force_newline, 1); + if ( args->force_newline ) convert_set_option(args->convert, force_newline, 1); convert_set_option(args->convert, subset_samples, &args->smpl_pass); if ( args->allow_undef_tags ) convert_set_option(args->convert, allow_undef_tags, 1); free(samples); @@ -106,6 +107,9 @@ static void init_data(args_t *args) max_unpack |= filter_max_unpack(args->filter); } args->files->max_unpack = max_unpack; + if ( !args->filter || args->print_filtered || !(filter_max_unpack(args->filter) & BCF_UN_FMT) ) + convert_set_option(args->convert, header_samples, 1); + if ( args->print_filtered ) convert_set_option(args->convert, print_filtered, args->print_filtered); } static void destroy_data(args_t *args) @@ -232,10 +236,12 @@ static void usage(void) fprintf(stderr, "Options:\n"); fprintf(stderr, " -e, --exclude EXPR Exclude sites for which the expression is true (see man page for details)\n"); fprintf(stderr, " --force-samples Only warn about unknown subset samples\n"); + fprintf(stderr, " -F, --print-filtered STR Output STR for samples failing the -i/-e filtering expression\n"); fprintf(stderr, " -f, --format STRING See man page for details\n"); fprintf(stderr, " -H, --print-header Print header\n"); fprintf(stderr, " -i, --include EXPR Select sites for which the expression is true (see man page for details)\n"); fprintf(stderr, " -l, --list-samples Print the list of samples and exit\n"); + fprintf(stderr, " -N, --disable-automatic-newline Disable automatic addition of newline character when not present\n"); fprintf(stderr, " -o, --output FILE Output file name [stdout]\n"); fprintf(stderr, " -r, --regions REGION Restrict to comma-separated list of regions\n"); fprintf(stderr, " -R, --regions-file FILE Restrict to regions listed in a file\n"); @@ -259,6 +265,7 @@ int main_vcfquery(int argc, char *argv[]) int c, collapse = 0; args_t *args = (args_t*) calloc(1,sizeof(args_t)); args->argc = argc; args->argv = argv; + args->force_newline = 1; int regions_is_file = 0, targets_is_file = 0; int regions_overlap = 1; int targets_overlap = 0; @@ -267,8 +274,10 @@ int main_vcfquery(int argc, char *argv[]) { {"help",0,0,'h'}, {"list-samples",0,0,'l'}, + {"disable-automatic-newline",required_argument,NULL,'N'}, {"include",1,0,'i'}, {"exclude",1,0,'e'}, + {"print-filtered",1,0,'F'}, {"format",1,0,'f'}, {"force-samples",0,0,3}, {"output-file",1,0,'o'}, @@ -288,10 +297,12 @@ int main_vcfquery(int argc, char *argv[]) {"allow-undef-tags",0,0,'u'}, {0,0,0,0} }; - while ((c = getopt_long(argc, argv, "hlr:R:f:a:s:S:Ht:T:c:v:i:e:o:u",loptions,NULL)) >= 0) { + while ((c = getopt_long(argc, argv, "hlr:R:F:f:a:s:S:Ht:T:c:v:i:e:o:uN",loptions,NULL)) >= 0) { switch (c) { case 'o': args->fn_out = optarg; break; + case 'F': args->print_filtered = optarg; break; case 'f': args->format_str = strdup(optarg); break; + case 'N': args->force_newline = 0; break; case 'H': args->print_header = 1; break; case 'v': args->vcf_list = optarg; break; case 'c': diff --git a/vcfsort.c b/vcfsort.c index 3b208a0d3..d0dc5b17e 100644 --- a/vcfsort.c +++ b/vcfsort.c @@ -337,7 +337,6 @@ static void usage(args_t *args) fprintf(stderr, "Options:\n"); fprintf(stderr, " -m, --max-mem FLOAT[kMG] maximum memory to use [768M]\n"); // using metric units, 1M=1e6 fprintf(stderr, " -o, --output FILE output file name [stdout]\n"); - fprintf(stderr, " -O, --output-type b|u|z|v b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n"); fprintf(stderr, " -O, --output-type u|b|v|z[0-9] u/b: un/compressed BCF, v/z: un/compressed VCF, 0-9: compression level [v]\n"); #ifdef _WIN32 diff --git a/vcfview.c b/vcfview.c index e09efa0bc..68aaeab00 100644 --- a/vcfview.c +++ b/vcfview.c @@ -36,6 +36,7 @@ THE SOFTWARE. */ #include #include #include +#include #include "bcftools.h" #include "filter.h" #include "htslib/khash_str2int.h" @@ -78,6 +79,7 @@ typedef struct _args_t int record_cmd_line; char *index_fn; int write_index; + int trim_star_allele; htsFile *out; } args_t; @@ -456,6 +458,19 @@ int subset_vcf(args_t *args, bcf1_t *line) int ret = bcf_trim_alleles(args->hsub ? args->hsub : args->hdr, line); if ( ret<0 ) error("Error: Could not trim alleles at %s:%"PRId64"\n", bcf_seqname(args->hsub ? args->hsub : args->hdr, line), (int64_t) line->pos+1); } + if (args->trim_star_allele) + { + int iunseen; + if ( args->trim_star_allele && (line->n_allele > 2 || args->trim_star_allele > 1) && (iunseen=get_unseen_allele(line)) && iunseen>0 ) + { + // the unobserved star allele should be trimmed, either it is variant site or trimming of all sites was requested + kbitset_t *rm_set = kbs_init(line->n_allele); + kbs_insert(rm_set, iunseen); + if ( bcf_remove_allele_set(args->hdr,line,rm_set) ) + error("[%s] Error: failed to trim the unobserved allele at %s:%"PRIhts_pos"\n",__func__,bcf_seqname(args->hdr,line),line->pos+1); + kbs_destroy(rm_set); + } + } if (args->phased) { int phased = bcf_all_phased(args->hdr, line); if (args->phased == FLT_INCLUDE && !phased) { return 0; } // skip unphased @@ -512,6 +527,7 @@ static void usage(args_t *args) fprintf(stderr, " --threads INT Use multithreading with INT worker threads [0]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Subset options:\n"); + fprintf(stderr, " -A, --trim-unseen-allele Remove '<*>' or '' at variant (-A) or at all (-AA) sites\n"); fprintf(stderr, " -a, --trim-alt-alleles Trim ALT alleles not seen in the genotype fields (or their subset with -s/-S)\n"); fprintf(stderr, " -I, --no-update Do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)\n"); fprintf(stderr, " -s, --samples [^]LIST Comma separated list of samples to include (or exclude with \"^\" prefix). Be careful\n"); @@ -568,6 +584,7 @@ int main_vcfview(int argc, char *argv[]) {"exclude",required_argument,NULL,'e'}, {"include",required_argument,NULL,'i'}, {"trim-alt-alleles",no_argument,NULL,'a'}, + {"trim-unseen-allele",no_argument,NULL,'A'}, {"no-update",no_argument,NULL,'I'}, {"drop-genotypes",no_argument,NULL,'G'}, {"private",no_argument,NULL,'x'}, @@ -604,7 +621,7 @@ int main_vcfview(int argc, char *argv[]) {NULL,0,NULL,0} }; char *tmp; - while ((c = getopt_long(argc, argv, "l:t:T:r:R:o:O:s:S:Gf:knv:V:m:M:auUhHc:C:Ii:e:xXpPq:Q:g:",loptions,NULL)) >= 0) + while ((c = getopt_long(argc, argv, "l:t:T:r:R:o:O:s:S:Gf:knv:V:m:M:aAuUhHc:C:Ii:e:xXpPq:Q:g:",loptions,NULL)) >= 0) { char allele_type[9] = "nref"; switch (c) @@ -646,6 +663,7 @@ int main_vcfview(int argc, char *argv[]) case 'S': args->sample_names = optarg; args->sample_is_file = 1; break; case 1 : args->force_samples = 1; break; case 'a': args->trim_alts = 1; args->calc_ac = 1; break; + case 'A': args->trim_star_allele++; break; case 'I': args->update_info = 0; break; case 'G': args->sites_only = 1; break; diff --git a/version.sh b/version.sh index 69bf963de..1b1372259 100755 --- a/version.sh +++ b/version.sh @@ -24,7 +24,7 @@ # DEALINGS IN THE SOFTWARE. # Master version, for use in tarballs or non-git source copies -VERSION=1.18 +VERSION=1.19 # If we have a git clone, then check against the current tag if [ -e .git ]