Skip to content

Commit

Permalink
Release 1.15
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Feb 21, 2022
2 parents 5f1bf7a + 310cd8c commit 580b52c
Show file tree
Hide file tree
Showing 110 changed files with 3,025 additions and 885 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ install:
- set MSYSTEM=MINGW64
- set PATH=C:/msys64/usr/bin;C:/msys64/mingw64/bin;%PATH%
- set MINGWPREFIX=x86_64-w64-mingw32
- "sh -lc \"pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 mingw-w64-x86_64-xz mingw-w64-x86_64-curl\""
- "sh -lc \"pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-autotools mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2 mingw-w64-x86_64-xz mingw-w64-x86_64-curl\""

# The user may have e.g. jkbonfield/bcftools branch FOO and an associated
# jkbonfield/htslib branch FOO. If so use that related htslib, obtained by
Expand Down
8 changes: 4 additions & 4 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ ubuntu_task:
<< : *TEST


# CentOS
centos_task:
name: centos-gcc
# Rocky Linux
rockylinux_task:
name: rockylinux-gcc
container:
image: centos:latest
image: rockylinux:latest
cpu: 2
memory: 1G

Expand Down
29 changes: 27 additions & 2 deletions INSTALL
Original file line number Diff line number Diff line change
Expand Up @@ -218,16 +218,22 @@ Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.
RedHat / CentOS
---------------

Note: To install gsl-devel, it may be necessary to enable the "crb" repository.
dnf --enablerepo=crb install gsl-devel

sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel gsl-devel perl-ExtUtils-Embed

Note: On some versions, Perl FindBin will need to be installed to make the tests work.
sudo yum install perl-FindBin

Alpine Linux
------------

Note: To install gsl-dev, it may be necessary to enable the "community"
repository in /etc/apk/repositories.

sudo apk update # Ensure the package list is up to date
sudo apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev
doas apk update # Ensure the package list is up to date
doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev

OpenSUSE
--------
Expand All @@ -240,4 +246,23 @@ MacOS, assuming Xcode is installed:
xz
gsl (optional)

Windows MSYS2/MINGW64
---------------------

The configure script must be used as without it the compilation will
likely fail.

Follow MSYS2 installation instructions at
https://www.msys2.org/wiki/MSYS2-installation/

Then relaunch to MSYS2 shell using the "MSYS2 MinGW x64" executable.
Once in that environment (check $MSYSTEM equals "MINGW64") install the
compilers using pacman -S and the following package list:

base-devel mingw-w64-x86_64-toolchain
mingw-w64-x86_64-libdeflate mingw-w64-x86_64-zlib mingw-w64-x86_64-bzip2
mingw-w64-x86_64-xz mingw-w64-x86_64-curl mingw-w64-x86_64-autotools
mingw-w64-x86_64-tools-git

(The last is only needed for building libraries compatible with MSVC.)

11 changes: 6 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ OBJS = main.o vcfindex.o tabix.o \
vcfstats.o vcfisec.o vcfmerge.o vcfquery.o vcffilter.o filter.o vcfsom.o \
vcfnorm.o vcfgtcheck.o vcfview.o vcfannotate.o vcfroh.o vcfconcat.o \
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
vcfcnv.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
vcfcnv.o vcfhead.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
regidx.o smpl_ilist.o csq.o vcfbuf.o \
mpileup.o bam2bcf.o bam2bcf_indel.o bam_sample.o \
vcfsort.o cols.o extsort.o dist.o abuf.o \
Expand Down Expand Up @@ -104,7 +104,7 @@ endif

include config.mk

PACKAGE_VERSION = 1.14
PACKAGE_VERSION = 1.15

# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
Expand Down Expand Up @@ -217,7 +217,7 @@ bcftools: $(OBJS) $(HTSLIB)

plugins: $(PLUGINS)

bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h)
bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h)
call_h = call.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) vcmp.h
variantkey_h = variantkey.h hex.h
convert_h = convert.h $(htslib_vcf_h)
Expand All @@ -240,15 +240,16 @@ vcfplugin.o: vcfplugin.c config.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h)
vcfcall.o: vcfcall.c $(htslib_vcf_h) $(htslib_kfunc_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(bcftools_h) $(call_h) $(prob1_h) $(ploidy_h) $(gvcf_h) regidx.h $(vcfbuf_h)
vcfconcat.o: vcfconcat.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_thread_pool_h) $(bcftools_h)
vcfconvert.o: vcfconvert.c $(htslib_faidx_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kseq_h) $(bcftools_h) $(filter_h) $(convert_h) $(tsv2vcf_h)
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h
vcffilter.o: vcffilter.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) rbuf.h regidx.h
vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_hts_os_h) $(bcftools_h) extsort.h
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h)
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(smpl_ilist_h)
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
vcfhead.o: vcfhead.c $(htslib_kstring_h) $(htslib_vcf_h) $(bcftools_h)
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h)
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_hts_os_h) kheap.h $(bcftools_h)
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h dist.h
Expand Down
112 changes: 104 additions & 8 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,10 +1,106 @@
## Release 1.15 (21st February 2022)


* New `bcftools head` subcommand for conveniently displaying the headers
of a VCF or BCF file. Without any options, this is equivalent to
`bcftools view --header-only --no-version` but more succinct and memorable.

* The `-T, --targets-file` option had the following bug originating in HTSlib code:
when an uncompressed file with multiple columns CHR,POS,REF was provided, the
REF would be interpreted as 0 gigabases (#1598)

Changes affecting specific commands:

* bcftools annotate

- In addition to `--rename-annots`, which requires a file with name mappings,
it is now possible to do the same on the command line `-c NEW_TAG:=OLD_TAG`

- Add new option --min-overlap which allows to specify the minimum required
overlap of intersecting regions

- Allow to transfer ALT from VCF with or without replacement using
bcftools annotate -a annots.vcf.gz -c ALT file.vcf.gz
bcftools annotate -a annots.vcf.gz -c +ALT file.vcf.gz

* bcftools convert

- Revamp of `--gensample`, `--hapsample` and `--haplegendsample` family of options
which includes the following changes:

- New `--3N6` option to output/input the new version of the .gen file format,
see https://www.cog-genomics.org/plink/2.0/formats#gen

- Deprecate the `--chrom` option in favor of `--3N6`. A simple `cut` command
can be used to convert from the new 3*M+6 column format to the format printed
with `--chrom` (`cut -d' ' -f1,3-`).

- The CHROM:POS_REF_ALT IDs which are used to detect strand swaps are required
and must appear either in the "SNP ID" column or the "rsID" column. The column
is autodetected for `--gensample2vcf`, can be the first or the second for
`--hapsample2vcf` (depending on whether the `--vcf-ids` option is given), must be
the first for `--haplegendsample2vcf`.

* bcftools csq

- Allow GFF files with phase column unset

* bcftools filter

- New `--mask`, `--mask-file` and `--mask-overlap` options to soft filter
variants in regions (#1635)

* bcftools +fixref

- The `-m id` option now works also for non-dbSNP ids, i.e. not just `rsINT`

- New `-m flip-all` mode for flipping all sites, including ambiguous A/T and C/G sites

* bcftools isec

- Prevent segfault on sites filtered with -i/-e in all files (#1632)

* bcftools mpileup

- More flexible read filtering using the options
--ls, --skip-all-set .. skip reads with all of the FLAG bits set
--ns, --skip-any-set .. skip reads with any of the FLAG bits set
--lu, --skip-all-unset .. skip reads with all of the FLAG bits unset
--nu, --skip-any-unset .. skip reads with any of the FLAG bits unset

The existing synonymous options will continue to function but their use
is discouraged
--rf, --incl-flags STR|INT Required flags: skip reads with mask bits unset
--ff, --excl-flags STR|INT Filter flags: skip reads with mask bits set

* bcftools query

- Make the `--samples` and `--samples-file` options work also in the `--list-samples`
mode. Add a new `--force-samples` option which allows to proceed even when some of
the requested samples are not present in the VCF (#1631)

* bcftools +setGT

- Fix a bug in `-t q -e EXPR` logic applied on FORMAT fields, sites with all
samples failing the expression EXPR were incorrectly skipped. This problem
affected only the use of `-e` logic, not the `-i` expressions (#1607)

* bcftools sort

- make use of the TMPDIR environment variable when defined

* bcftools +trio-dnm2

- The --use-NAIVE mode now also adds the de novo allele in FORMAT/VA


## Release 1.14 (22nd October 2021)


Changes affecting the whole of bcftools, or multiple commands:

* New `--regions-overlap` and `--targets-overlap` options which address
a long-standing design problem with subsetting VCF files by region.
a long-standing design problem with subsetting VCF files by region.
BCFtools recognize two sets of options, one for streaming (`-t/-T`) and
one for index-gumping (`-r/-R`). They behave differently, the first
includes only records with POS coordinate within the regions, the other
Expand Down Expand Up @@ -32,11 +128,11 @@ Changes affecting specific commands:
by using `-c INFO/END`.

- add a new '.' modifier to control wheter missing values should be carried
over from a tab-delimited file or not. For example:
over from a tab-delimited file or not. For example:

-c TAG .. adds TAG if the source value is not missing. If TAG
exists in the target file, it will be overwritten

-c .TAG .. adds TAG even if the source value is missing. This
can overwrite non-missing values with a missing value
and can create empty VCF fields (`TAG=.`)
Expand Down Expand Up @@ -165,7 +261,7 @@ Changes affecting specific commands:
* bcftools +fill-tags:

- Generalization and better support for custom functions that allow
adding new INFO tags based on arbitrary `-i, --include` type of
adding new INFO tags based on arbitrary `-i, --include` type of
expressions. For example, to calculate a missing INFO/DP annotation
from FORMAT/AD, it is possible to use:

Expand Down Expand Up @@ -229,7 +325,7 @@ Changes affecting specific commands:

- Atomization of AD and QS tags now correctly updates occurrences of duplicate
alleles within different haplotypes

- Fix a bug in atomization of Number=A,R tags

* bcftools reheader:
Expand All @@ -241,7 +337,7 @@ Changes affecting specific commands:
- A wider range of genotypes can be set by the plugin by allowing
specifying custom genotypes. For example, to force a heterozygous
genotype it is now possible to use expressions like:

c:'m|M'
c:0/1
c:0
Expand All @@ -253,7 +349,7 @@ Changes affecting specific commands:
- Better handling of ambiguous keys such as INFO/AF and CSQ/AD. The
`-p, --annot-prefix` option is now applied before doing anything else
which allows its use with `-f, --format` and `-c, --columns` options.

- Some consequence field names may not constitute a valid tag name, such
as "pos(1-based)". Newly field names are trimmed to exclude brackets.

Expand Down Expand Up @@ -383,7 +479,7 @@ Changes affecting specific commands:

* bcftools csq:

- Fix a bug wich caused incorrect FORMAT/BCSQ formatting at sites with too
- Fix a bug wich caused incorrect FORMAT/BCSQ formatting at sites with too
many per-sample consequences

- Fix a bug which incorrectly handled the --ncsq parameter and could clash
Expand Down
6 changes: 3 additions & 3 deletions bam_sample.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* bam_sample.c -- group data by sample.
Copyright (C) 2010, 2011 Broad Institute.
Copyright (C) 2013, 2016-2018 Genome Research Ltd.
Copyright (C) 2013, 2016-2022 Genome Research Ltd.
Author: Heng Li <[email protected]>, Petr Danecek <[email protected]>
Expand Down Expand Up @@ -281,7 +281,7 @@ int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file)

int i, nsamples = 0;
char **samples = hts_readlist(list, is_file, &nsamples);
if ( !nsamples ) return 0;
if ( !samples || !nsamples ) return 0;

kstring_t ori = {0,0,0};
kstring_t ren = {0,0,0};
Expand Down Expand Up @@ -328,7 +328,7 @@ int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file)

int i, nrows = 0;
char **rows = hts_readlist(list, is_file, &nrows);
if ( !nrows ) return 0;
if ( !rows || !nrows ) return 0;

kstring_t fld1 = {0,0,0};
kstring_t fld2 = {0,0,0};
Expand Down
9 changes: 6 additions & 3 deletions bcftools.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* bcftools.h -- utility function declarations.
Copyright (C) 2013-2021 Genome Research Ltd.
Copyright (C) 2013-2022 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -28,6 +28,7 @@ THE SOFTWARE. */
#include <stdarg.h>
#include <htslib/hts_defs.h>
#include <htslib/vcf.h>
#include <htslib/synced_bcf_reader.h>
#include <math.h>

#define FT_TAB_TEXT 0 // custom tab-delimited text file
Expand All @@ -50,9 +51,11 @@ void error_errno(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT

void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
const char *hts_bcf_wmode(int file_type);
const char *hts_bcf_wmode2(int file_type, char *fname);
void set_wmode(char dst[8], int file_type, char *fname, int compression_level); // clevel: 0-9 with or zb type, -1 unset
const char *hts_bcf_wmode2(int file_type, const char *fname);
void set_wmode(char dst[8], int file_type, const char *fname, int compression_level); // clevel: 0-9 with or zb type, -1 unset
char *init_tmp_prefix(const char *prefix);
int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq);
int parse_overlap_option(const char *arg);

void *smalloc(size_t size); // safe malloc

Expand Down
3 changes: 2 additions & 1 deletion bin.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* The MIT License
Copyright (c) 2016 Genome Research Ltd.
Copyright (c) 2016-2022 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -43,6 +43,7 @@ bin_t *bin_init(const char *list_def, float min, float max)
int is_file = strchr(list_def,',') ? 0 : 1;
int i, nlist;
char **list = hts_readlist(list_def, is_file, &nlist);
if ( !list ) error("Error: failed to read %s\n",list_def);
bin->nbins = nlist;
bin->bins = (float*) malloc(sizeof(float)*nlist);
for (i=0; i<nlist; i++)
Expand Down
13 changes: 8 additions & 5 deletions consensus.c
Original file line number Diff line number Diff line change
Expand Up @@ -733,11 +733,14 @@ static void apply_variant(args_t *args, bcf1_t *rec)
if ( rec->rlen > args->fa_buf.l - idx )
{
rec->rlen = args->fa_buf.l - idx;
alen = strlen(alt_allele);
if ( alen > rec->rlen )
if ( alt_allele[0]!='<' )
{
alt_allele[rec->rlen] = 0;
fprintf(stderr,"Warning: trimming variant starting at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
alen = strlen(alt_allele);
if ( alen > rec->rlen )
{
fprintf(stderr,"Warning: trimming variant \"%s\" starting at %s:%"PRId64"\n", alt_allele,bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
alt_allele[rec->rlen] = 0;
}
}
}
if ( idx>=args->fa_buf.l )
Expand All @@ -749,7 +752,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
// TODO: symbolic deletions probably need more work above with PICK_SHORT|PICK_LONG

if ( strcasecmp(alt_allele,"<DEL>") && strcasecmp(alt_allele,"<*>") && strcasecmp(alt_allele,"<NON_REF>") )
error("Symbolic alleles other than <DEL>, <*> or <NON_REF> are currently not supported, e.g. %s at %s:%"PRId64".\n"
error("Symbolic alleles other than <DEL>, <*> or <NON_REF> are currently not supported, e.g. \"%s\" at %s:%"PRId64".\n"
"Please use filtering expressions to exclude such sites, for example by running with: -e 'ALT~\"<.*>\"'\n",
alt_allele,bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
if ( !strcasecmp(alt_allele,"<DEL>") )
Expand Down
Loading

0 comments on commit 580b52c

Please sign in to comment.