From 294600f8a648c659c61e828ce3a6963de5d539f2 Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Wed, 27 Jul 2022 17:35:52 -0400 Subject: [PATCH 01/21] picard liftover --- cwl/picard_liftover.cwl | 68 ++++++++++++++++ cwl/preprocess_liftover.cwl | 49 ++++++++++++ cwl/workflow_picard_liftover.cwl | 66 ++++++++++++++++ dockerfiles/picard_liftover_vcf/Dockerfile | 23 ++++++ .../scripts/preprocess_liftover.py | 79 +++++++++++++++++++ 5 files changed, 285 insertions(+) create mode 100644 cwl/picard_liftover.cwl create mode 100644 cwl/preprocess_liftover.cwl create mode 100644 cwl/workflow_picard_liftover.cwl create mode 100644 dockerfiles/picard_liftover_vcf/Dockerfile create mode 100644 dockerfiles/picard_liftover_vcf/scripts/preprocess_liftover.py diff --git a/cwl/picard_liftover.cwl b/cwl/picard_liftover.cwl new file mode 100644 index 0000000..74837ec --- /dev/null +++ b/cwl/picard_liftover.cwl @@ -0,0 +1,68 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: CommandLineTool + +requirements: + - class: InlineJavascriptRequirement + +hints: + - class: DockerRequirement + dockerPull: ACCOUNT/picard_liftover_vcf:VERSION + +baseCommand: [gatk, LiftoverVcf] + +inputs: + - id: vcf + type: File + inputBinding: + prefix: -I + + doc: expect the path to the input vcf + + - id: reference_sequence + type: string + inputBinding: + prefix: -R + + doc: the reference sequence for the target genome build + + - id: reject + type: string + default: "reject.vcf" + inputBinding: + prefix: --REJECT + + doc: file to which to write rejected records + + - id: output_vcf + type: string + default: "output.vcf" + inputBinding: + prefix: -O + + doc: base name of output vcf file + + - id: chain + type: File + inputBinding: + prefix: -C + doc: the liftover chain file + + +outputs: + - id: output + type: File + outputBinding: + glob: $(inputs.output_vcf) + + - id: output_reject + type: File + outputBinding: + glob: $(inputs.reject) + +doc: | + + run picard liftover vcf + diff --git a/cwl/preprocess_liftover.cwl b/cwl/preprocess_liftover.cwl new file mode 100644 index 0000000..6c4c0f4 --- /dev/null +++ b/cwl/preprocess_liftover.cwl @@ -0,0 +1,49 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: CommandLineTool + +requirements: + - class: InlineJavascriptRequirement + +hints: + - class: DockerRequirement + dockerPull: ACCOUNT/sv_germline_granite:VERSION + +baseCommand: [python3, preprocess_liftover.py] + +inputs: + - id: vcf + type: File + inputBinding: + prefix: -i + + doc: expect the path to the input vcf + + - id: sample_names + type: string[] + inputBinding: + prefix: -s + + doc: list of sample IDs + + - id: output_vcf + type: string + default: "output.vcf" + inputBinding: + prefix: -o + + doc: base name of output vcf file + + +outputs: + - id: output + type: File + outputBinding: + glob: $(inputs.output_vcf) + +doc: | + + run preprocess_liftover.py to validate input VCF file for the liftover step + diff --git a/cwl/workflow_picard_liftover.cwl b/cwl/workflow_picard_liftover.cwl new file mode 100644 index 0000000..2c4d09e --- /dev/null +++ b/cwl/workflow_picard_liftover.cwl @@ -0,0 +1,66 @@ + +cwlVersion: v1.0 + +class: Workflow + +requirements: + MultipleInputFeatureRequirement: {} + +inputs: + - id: input_vcf + type: File + secondaryFiles: + - .tbi + doc: expect the path to the vcf gz file + + - id: chain + type: File + doc: the liftover chain file + + - id: reference + type: File + secondaryFiles: + - ^.dict + - .fai + doc: expect the path to the fa reference file + + - id: sample_names + type: string + doc: list of sample IDs + +outputs: + vcf_lifted: + type: File + outputSource: vcf_liftover/output + + reject: + type: File + outputSource: vcf_liftover/output_reject + +steps: + preprocess: + run: preprocess_liftover.cwl + in: + vcf: + source: input_vcf + sample_names: + source: sample_names + + out: [output] + + vcf_liftover: + run: picard_liftover.cwl + in: + vcf: + source: preprocess/output + chain: + source: chain + reference_sequence: + source: reference + + out: [output, output_reject] + + +doc: | + run preprocess_liftover | + run picard_liftover \ No newline at end of file diff --git a/dockerfiles/picard_liftover_vcf/Dockerfile b/dockerfiles/picard_liftover_vcf/Dockerfile new file mode 100644 index 0000000..751d752 --- /dev/null +++ b/dockerfiles/picard_liftover_vcf/Dockerfile @@ -0,0 +1,23 @@ +####################################################################### +# Basic image +####################################################################### +FROM cgap/cgap-ubuntu2004-py-38:0.0.1 +LABEL mainainers="Dominika Maziec (dominika.maziec@hms.harvard.edu), Michele Berselli (berselli.michele@gmail.com)" + +####################################################################### +# Setting working env +####################################################################### +WORKDIR /usr/local/bin + +####################################################################### +# Software +####################################################################### +## conda install +RUN conda install -c bioconda -y gatk4==4.2.6.1 picard==2.26.11 && \ + conda clean -a -y -f + + +COPY scripts/preprocess_liftover.py . +RUN chmod +x preprocess_liftover.py + +CMD ["bash"] \ No newline at end of file diff --git a/dockerfiles/picard_liftover_vcf/scripts/preprocess_liftover.py b/dockerfiles/picard_liftover_vcf/scripts/preprocess_liftover.py new file mode 100644 index 0000000..0d6eed6 --- /dev/null +++ b/dockerfiles/picard_liftover_vcf/scripts/preprocess_liftover.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 + +################################################################################## +# +# Script to validate input VCF file for the liftover step. +# It runs the following steps: +# 1. Check if sample identifiers in the VCF matches provided sample names +# 2. Exlcude non standard chromosomes i.e GL000225.1 +# 3. If the VCF is not 'chr' based, add the prefix +# +################################################################################## + + +from granite.lib import vcf_parser +import argparse + +#list of standard chromosomes +std_chromosomes = list(map(str, list(range(1,23)))) + ["X", "Y"] + + + +################################################ +# Functions +################################################ + + +def main(args): + + output_file = args['outputfile'] + + vcf = vcf_parser.Vcf(args['inputfile']) + + + # 1. Check if sample names match genotype IDs + sample_names = args['sample_names'] + vcf_sample_names = vcf.header.IDs_genotypes + sample_names_err = f"Sample names {sample_names} do not match sample identifies in the VCF {vcf_sample_names}" + + if len(sample_names) != len(vcf_sample_names): + raise ValueError(sample_names_err) + else: + for id in vcf_sample_names: + if id not in args['sample_names']: + raise ValueError(sample_names_err) + + with open(output_file, "w") as output: + + vcf.write_header(output) + + for vnt in vcf.parse_variants(): + + # 2. Exclude non standard chromosomes + if vnt.CHROM in std_chromosomes: + + # 3. Add 'chr' to CHROM if not present + if vnt.CHROM.startswith("chr") == False: + vnt.CHROM = f"chr{vnt.CHROM}" + + vcf.write_variant(output, vnt) + + + + +################################################ +# Main +################################################ + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Converts genomic coordinates between two different assemblies using pyliftover.') + + parser.add_argument('-i','--inputfile', help='input VCF file', required=True) + parser.add_argument('-o','--outputfile', help='output VCF file', required=True) + parser.add_argument('-s', '--sample_names', help='list of sample IDs that must be present in the input VCF', nargs='+', required=True) + + + args = vars(parser.parse_args()) + + main(args) \ No newline at end of file From 5e82d252078fb63d8649d2f542b78650bc31e5b5 Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Thu, 28 Jul 2022 17:51:24 +0000 Subject: [PATCH 02/21] . --- cwl/picard_liftover.cwl | 7 +++++-- cwl/preprocess_liftover.cwl | 6 +++--- cwl/workflow_picard_liftover.cwl | 4 +--- dockerfiles/picard_liftover_vcf/Dockerfile | 2 ++ 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cwl/picard_liftover.cwl b/cwl/picard_liftover.cwl index 74837ec..4ebab65 100644 --- a/cwl/picard_liftover.cwl +++ b/cwl/picard_liftover.cwl @@ -9,7 +9,7 @@ requirements: hints: - class: DockerRequirement - dockerPull: ACCOUNT/picard_liftover_vcf:VERSION + dockerPull: picard_liftover:1.0.0 baseCommand: [gatk, LiftoverVcf] @@ -22,10 +22,13 @@ inputs: doc: expect the path to the input vcf - id: reference_sequence - type: string + type: File inputBinding: prefix: -R + secondaryFiles: + - ^.dict + - .fai doc: the reference sequence for the target genome build - id: reject diff --git a/cwl/preprocess_liftover.cwl b/cwl/preprocess_liftover.cwl index 6c4c0f4..a09164c 100644 --- a/cwl/preprocess_liftover.cwl +++ b/cwl/preprocess_liftover.cwl @@ -9,15 +9,15 @@ requirements: hints: - class: DockerRequirement - dockerPull: ACCOUNT/sv_germline_granite:VERSION + dockerPull: picard_liftover:1.0.0 -baseCommand: [python3, preprocess_liftover.py] +baseCommand: [python3, /usr/local/bin/preprocess_liftover.py] inputs: - id: vcf type: File inputBinding: - prefix: -i + prefix: -i doc: expect the path to the input vcf diff --git a/cwl/workflow_picard_liftover.cwl b/cwl/workflow_picard_liftover.cwl index 2c4d09e..52cbc5b 100644 --- a/cwl/workflow_picard_liftover.cwl +++ b/cwl/workflow_picard_liftover.cwl @@ -9,8 +9,6 @@ requirements: inputs: - id: input_vcf type: File - secondaryFiles: - - .tbi doc: expect the path to the vcf gz file - id: chain @@ -25,7 +23,7 @@ inputs: doc: expect the path to the fa reference file - id: sample_names - type: string + type: string[] doc: list of sample IDs outputs: diff --git a/dockerfiles/picard_liftover_vcf/Dockerfile b/dockerfiles/picard_liftover_vcf/Dockerfile index 751d752..733811a 100644 --- a/dockerfiles/picard_liftover_vcf/Dockerfile +++ b/dockerfiles/picard_liftover_vcf/Dockerfile @@ -16,6 +16,8 @@ WORKDIR /usr/local/bin RUN conda install -c bioconda -y gatk4==4.2.6.1 picard==2.26.11 && \ conda clean -a -y -f +## granite +RUN pip install granite-suite==0.2.0 COPY scripts/preprocess_liftover.py . RUN chmod +x preprocess_liftover.py From 3513a95eb8edbe0fd5585a4fdf7a9405aa4f2f57 Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Mon, 1 Aug 2022 21:48:11 +0000 Subject: [PATCH 03/21] . --- ...{picard_liftover.cwl => gatk_liftover.cwl} | 8 +- cwl/preprocess_liftover.cwl | 3 +- ...iftover.cwl => workflow_gatk_liftover.cwl} | 4 +- .../Dockerfile | 2 +- .../scripts/preprocess_liftover.py | 5 +- .../files/liftover_vcf_chr_missing_in.vcf | 135 ++++++++++++ .../test/files/liftover_vcf_correct_out.vcf | 135 ++++++++++++ .../liftover_vcf_non_standard_chrom_in.vcf | 194 ++++++++++++++++++ .../files/liftover_vcf_two_samples_in.vcf | 135 ++++++++++++ .../scripts/test/test_preprocess_liftover.py | 94 +++++++++ portal_objects/file_reference.json | 14 +- portal_objects/software.json | 36 ++++ .../workflows/workflow_liftovervcf.json | 152 ++++++++++++++ 13 files changed, 906 insertions(+), 11 deletions(-) rename cwl/{picard_liftover.cwl => gatk_liftover.cwl} (89%) rename cwl/{workflow_picard_liftover.cwl => workflow_gatk_liftover.cwl} (95%) rename dockerfiles/{picard_liftover_vcf => gatk_liftover_vcf}/Dockerfile (87%) rename dockerfiles/{picard_liftover_vcf => gatk_liftover_vcf}/scripts/preprocess_liftover.py (97%) create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_correct_out.vcf create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py create mode 100644 portal_objects/workflows/workflow_liftovervcf.json diff --git a/cwl/picard_liftover.cwl b/cwl/gatk_liftover.cwl similarity index 89% rename from cwl/picard_liftover.cwl rename to cwl/gatk_liftover.cwl index 4ebab65..6fbec25 100644 --- a/cwl/picard_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -33,7 +33,7 @@ inputs: - id: reject type: string - default: "reject.vcf" + default: "reject.vcf.gz" inputBinding: prefix: --REJECT @@ -41,7 +41,7 @@ inputs: - id: output_vcf type: string - default: "output.vcf" + default: "output.vcf.gz" inputBinding: prefix: -O @@ -59,11 +59,15 @@ outputs: type: File outputBinding: glob: $(inputs.output_vcf) + secondaryFiles: + - .tbi - id: output_reject type: File outputBinding: glob: $(inputs.reject) + secondaryFiles: + - .tbi doc: | diff --git a/cwl/preprocess_liftover.cwl b/cwl/preprocess_liftover.cwl index a09164c..372b95a 100644 --- a/cwl/preprocess_liftover.cwl +++ b/cwl/preprocess_liftover.cwl @@ -9,7 +9,8 @@ requirements: hints: - class: DockerRequirement - dockerPull: picard_liftover:1.0.0 + dockerPull: ACCOUNT/gatk_liftover:VERSION + baseCommand: [python3, /usr/local/bin/preprocess_liftover.py] diff --git a/cwl/workflow_picard_liftover.cwl b/cwl/workflow_gatk_liftover.cwl similarity index 95% rename from cwl/workflow_picard_liftover.cwl rename to cwl/workflow_gatk_liftover.cwl index 52cbc5b..e7cfee6 100644 --- a/cwl/workflow_picard_liftover.cwl +++ b/cwl/workflow_gatk_liftover.cwl @@ -47,7 +47,7 @@ steps: out: [output] vcf_liftover: - run: picard_liftover.cwl + run: gatk_liftover.cwl in: vcf: source: preprocess/output @@ -61,4 +61,4 @@ steps: doc: | run preprocess_liftover | - run picard_liftover \ No newline at end of file + run gatk_liftover \ No newline at end of file diff --git a/dockerfiles/picard_liftover_vcf/Dockerfile b/dockerfiles/gatk_liftover_vcf/Dockerfile similarity index 87% rename from dockerfiles/picard_liftover_vcf/Dockerfile rename to dockerfiles/gatk_liftover_vcf/Dockerfile index 733811a..af3fe3c 100644 --- a/dockerfiles/picard_liftover_vcf/Dockerfile +++ b/dockerfiles/gatk_liftover_vcf/Dockerfile @@ -2,7 +2,7 @@ # Basic image ####################################################################### FROM cgap/cgap-ubuntu2004-py-38:0.0.1 -LABEL mainainers="Dominika Maziec (dominika.maziec@hms.harvard.edu), Michele Berselli (berselli.michele@gmail.com)" +LABEL mainainers="Michele Berselli (berselli.michele@gmail.com), Dominika Maziec (dominika.maziec@hms.harvard.edu)" ####################################################################### # Setting working env diff --git a/dockerfiles/picard_liftover_vcf/scripts/preprocess_liftover.py b/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py similarity index 97% rename from dockerfiles/picard_liftover_vcf/scripts/preprocess_liftover.py rename to dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py index 0d6eed6..2d6964e 100644 --- a/dockerfiles/picard_liftover_vcf/scripts/preprocess_liftover.py +++ b/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py @@ -18,7 +18,6 @@ std_chromosomes = list(map(str, list(range(1,23)))) + ["X", "Y"] - ################################################ # Functions ################################################ @@ -40,7 +39,7 @@ def main(args): raise ValueError(sample_names_err) else: for id in vcf_sample_names: - if id not in args['sample_names']: + if id not in sample_names: raise ValueError(sample_names_err) with open(output_file, "w") as output: @@ -58,8 +57,6 @@ def main(args): vcf.write_variant(output, vnt) - - ################################################ # Main diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf new file mode 100644 index 0000000..22bd7be --- /dev/null +++ b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf @@ -0,0 +1,135 @@ +##fileformat=VCFv4.1 +##fileDate=20220720 +##source=GenerateSVCandidates 1.6.0 +##reference=file:///home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##ALT= +##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 +1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 +1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 +1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 +1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 +1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 +1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_correct_out.vcf b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_correct_out.vcf new file mode 100644 index 0000000..3e5fe75 --- /dev/null +++ b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_correct_out.vcf @@ -0,0 +1,135 @@ +##fileformat=VCFv4.1 +##fileDate=20220720 +##source=GenerateSVCandidates 1.6.0 +##reference=file:///home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##ALT= +##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 +chr1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 +chr1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 +chr1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 +chr1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 +chr1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 +chr1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf new file mode 100644 index 0000000..c28cdf2 --- /dev/null +++ b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf @@ -0,0 +1,194 @@ +##fileformat=VCFv4.1 +##fileDate=20220720 +##source=GenerateSVCandidates 1.6.0 +##reference=file:///home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##ALT= +##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 +1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 +1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 +1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 +1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 +1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 +1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 +GL000198.1 76768 MantaDEL:295:3:3:3:3:0 ACCACAAACAAGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGGGATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCGCCCGCCACTACGCCCGGCTAATTTTTTGTATTTTTTAGTAGAGACGGGGTTTCACCGTTTTAGCCGGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCAGTCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGC A 299 MaxDepth END=77097;SVTYPE=DEL;SVLEN=-329;CIGAR=1M329D;CIPOS=0,15;HOMLEN=15;HOMSEQ=CCACAAACAAGTTTT GT:FT:GQ:PL:PR:SR 0/1:PASS:162:349,0,159:38,32:17,60 +GL000198.1 80167 MantaBND:561:0:7:0:0:0:1 G ]4:190809293]G 172 PASS SVTYPE=BND;MATEID=MantaBND:561:0:7:0:0:0:0;IMPRECISE;CIPOS=-165,166;BND_DEPTH=203;MATE_BND_DEPTH=42 GT:FT:GQ:PL:PR 0/1:PASS:172:222,0,999:125,30 +GL000198.1 86535 MantaBND:350:1:4:0:0:0:1 G G[4:190815411[ 999 MaxDepth SVTYPE=BND;MATEID=MantaBND:350:1:4:0:0:0:0;CIPOS=0,27;HOMLEN=27;HOMSEQ=TGAGTGGGATTACTGAATCATATGGTA;BND_DEPTH=188;MATE_BND_DEPTH=300 GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:55,53:32,22 +GL000208.1 15013 MantaDUP:TANDEM:3:993:1001:1:0:0 T 999 PASS END=29227;SVTYPE=DUP;SVLEN=14214;SVINSLEN=2;SVINSSEQ=AG GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:87,36:125,31 +GL000208.1 15267 MantaDEL:3:993:1001:2:1:0 C 999 PASS END=29480;SVTYPE=DEL;SVLEN=-14213;CIPOS=0,5;CIEND=0,5;HOMLEN=5;HOMSEQ=TGTCT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:91,36:92,32 +GL000208.1 81516 MantaDEL:3:3753:3753:0:8:0 TCAAAACTGCTCTATCAAAGGAATGGTTCATCTCTCTGGGTTCAATGCACACATCACAAAGAAGTTTCTGAGAATGCTTCTGGCTAGTTTGTATGTGAAGATATTCCCACTTCCAAAAAAGGCTTCAAGGCGCTCCAAATATTCACCTGCAATTGTACAAAAGTGTGTTTCAAAACTGTTCTGTCAAAAGGAAGGTTCAACTCTGTGAGTTGAATGCACACTTCACAGAGATGTTTCTGAGAATGCTTCTTTCTAGTTTTTCTGTGAAGATATTTCCTTCTCCACCATAGCCCTCAATGCGCTCCAAATGTCCGCTGGCAGATTCCACAGAAACAGTGTTTCAAAACTGCTCTAACAAAAGAAAGGTCCAACTCCGTGATTTGAATGCACACATCACAAAGCAGTTTCTGTGAATCCTTCTGTCTAGTTTTTATATGAGGAGATTTCCTTTTCTACCACGGGCATCAAAGCGTTCCAAATATCCAATTGTAGATTGTA T 193 PASS END=82013;SVTYPE=DEL;SVLEN=-497;CIGAR=1M497D;CIPOS=0,5;HOMLEN=5;HOMSEQ=CAAAA GT:FT:GQ:PL:PR:SR 0/1:PASS:193:243,0,999:70,2:107,13 +GL000228.1 4871 MantaDEL:501:0:0:0:0:1 CAACCTTGTGACTTTGCATGGTTCCGCCCCCATAGCTGCTCTCATGGGCTGGCATTGTGTACCTCTGGCTGGTATTGTGTGTCTCTGGCTTTTCCAGGTGCAAGGTGCAAGCTGTAGCTAGATCTACCATTATGCGGT CTAC 319 MaxDepth;MaxMQ0Frac END=5008;SVTYPE=DEL;SVLEN=-137;CIGAR=1M3I137D GT:FT:GQ:PL:PR:SR 0/1:PASS:319:369,0,477:0,2:37,14 +GL000214.1 52763 MantaINS:1061:0:0:1:0:0 C CTTTCAATGCTGCTGCAAAGGCTTCCTTATATTCTTCTAACTCAGTTGTAACCT 430 PASS END=52763;SVTYPE=INS;SVLEN=53;CIGAR=1M53I;CIPOS=0,53;HOMLEN=53;HOMSEQ=TTTCAATGCTGCTGCAAAGGCTTCCTTATATTCTTCTAACTCAGTTGTAACCT GT:FT:GQ:PL:PR:SR 0/1:PASS:173:480,0,170:0,0:16,11 +GL000214.1 84837 MantaDEL:1010:0:1:0:0:0 GCTTTCCTCTAGGTAAAGATCAGAACTCCAACTAGCACTTAACTCACTGGAAATATCTTAAGAGTCTCAAAATTCACTGCTTTGAATCCCTGACAAGTATAAAAATTTTATACTGAAAACTTCATGCTATTCAAAACATTAAAACAGAAACATCTGACTTAAAGCTTACATTTTTAAAATCTTTTTTATGCTTCTAAATTTGTTTTTATTCAAATATGGATACCAACAATAACATTTATGTCAATGCCTTCTGTTCAATATTGAACAAATAGAATTAGGAATAAGAATAATATGAGTACATCCAATCATTGAATGTACTTTATTTCCAGTATTACATCAAATGTACCTGCTCTCAATGTCTGTACTTTCTTTCTTTGTACTGCTCCTTTCACAGCAGGATCTTCCACTTCAGTGCTAGGCTGAATGGGTTTTAAAAGAAAACGATTCATAAATCATATATATTTTATACAACATGGAGTTAGTGATTCAAAAATATACATAATTAATTATCTTCAAGGAAGGATGTTTTGCAGGAGGCCCTACAAAGCAAAGGGGATATGTCATCAATTATATGTAAGTATGACAGGACCAACCAAACATTCATGCAGTGTTACTGTCGAGCTGAATTCTCAGGCCTGGCTATAAAAATATTTACTTAAGGTTTTGAGGGTTCTTCTTGGCTTCGTCTTTTCATTGCCTAGGACAGCAACATGACAGAAACACAATGAGGAAAATAGGAATATAGGATTCCCAAAATGCACAGTTTACATTTCAGTAGTGAGATTATGTTTCAAATGCCTATACTTAAAATAGAAAAGCATTGATATAACCGTGAACACGTGGACTAATGAGGAGAAAAGGGACCATTAAACAGAGGGGCAAATCAAACCTGAGAGAATCAATGTCAAAGCTGATGGTGAATGTACAGAGTATTTTAA G 999 PASS END=85774;SVTYPE=DEL;SVLEN=-937;CIGAR=1M937D;CIPOS=0,2;HOMLEN=2;HOMSEQ=CT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:152,5:159,60 +GL000218.1 51028 MantaDEL:176149:0:0:0:0:0 ATTTATGCTAATTAAACCAGAAAGTTTCTGTAGGTAATTCAGCTCTGCGCACCATTTAATAGGGTTATGCCAAACAGAACCAATGGTTATATAATACCCAGAATATAAACCTCAGCAAATTCATACTTTTGGAATGGCAACACAAGTGAGAATGAGAGGCAATATTTCATTTCAAAGTACCATAGGCTGCTAAGAGTGTTCTTTTCTTTTAAAAATTTGTGTGGTTGTTAG A 285 MaxMQ0Frac END=51258;SVTYPE=DEL;SVLEN=-230;CIGAR=1M230D;CIPOS=0,1;HOMLEN=1;HOMSEQ=T GT:FT:GQ:PL:PR:SR 0/1:PASS:285:335,0,999:97,3:144,16 +GL000220.1 45590 MantaDEL:927:0:0:0:0:0 TTTGACTTGTTTTATTGTAAAACAAGGCTAATATACAGTATGTGAAAGTTTCCATAAAATTC T 999 PASS END=45651;SVTYPE=DEL;SVLEN=-61;CIGAR=1M61D;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:64,0:181,82 +GL000211.1 45888 MantaDEL:117396:0:0:0:0:0 CGAGGTGCACCCTGTGTTCACACCAGGGACGCCAGTGTCCCCAGGGCCCAGCACAGGGGCTCATCGGAAGGCACTTTCTTCCGTGGGGGACCCAGGCCCCGCTTCTAGGCGGAGCGGTTTTTAATTTTTTTCTCTGCCCCAGGTGTCTCACCTTCCCGTCATGGGCCTTCTGCCCGCCTTGGGGTACCCCTAGCAAGCT C 105 PASS END=46086;SVTYPE=DEL;SVLEN=-198;CIGAR=1M198D;CIPOS=0,4;HOMLEN=4;HOMSEQ=GAGG GT:FT:GQ:PL:PR:SR 0/1:PASS:105:155,0,886:32,9:65,7 +GL000199.1 32551 MantaDEL:3:2276:8026:0:0:0 C 311 PASS END=34266;SVTYPE=DEL;SVLEN=-1715;IMPRECISE;CIPOS=-87,88;CIEND=-84,84 GT:FT:GQ:PL:PR 0/1:PASS:311:361,0,341:26,20 +GL000199.1 62034 MantaDEL:109965:1:1:0:4:0 GAGAAGTTTCTGAGAATGCTTCTGTCTTGATTTTATATGAAGATATTCCCGTTTCCAACGAGACCTTCAAAGCTATCCAAATATCCACTTGCAGATTCTACAAAAAGAGTGTTTCCAAAATGTTGTATCAAAACAAAGGTTCAACTCTGTTAGTTGAGGACACACATCGCAAATAAGTTTCTGAGAATGCTTCTGTCTAGTTTTTATTTGAAGATATTTCCTTTCTTACCATAGGCCTGAAAGCGCTTGAAATGTCCGTTTGCAGATACTACAGAAAGAGTGTTTCAAACATGCTCTATGAAAGGGAATGTTCAGTTCTGTGACGTGAATGCAAACATCACAAAGAAGTTCCTGAGAATGCTTCTCTCTAGATTTTATATGTAATCCCGTTTCCAACGAAATCCTCAAAGCTATCCAAACATCCACTTTCAGATTCCACAAAAAGAGTGTTTCAAAACTGTTCTGTAAAAAGAAAGGTTCATCTCTGTTAGTTGAATACACACATCACAA G 156 MaxDepth;MaxMQ0Frac;NoPairSupport END=62543;SVTYPE=DEL;SVLEN=-509;CIGAR=1M509D;CIPOS=0,26;HOMLEN=26;HOMSEQ=AGAAGTTTCTGAGAATGCTTCTGTCT GT:FT:GQ:PL:PR:SR 0/1:PASS:156:206,0,999:3,0:569,26 +GL000199.1 95746 MantaDEL:3:11211:11231:0:0:0 T 92 MaxDepth;MaxMQ0Frac END=96500;SVTYPE=DEL;SVLEN=-754;IMPRECISE;CIPOS=-87,88;CIEND=-78,78 GT:FT:GQ:PL:PR 0/1:PASS:92:142,0,250:18,9 +GL000199.1 140869 MantaDEL:3:11274:11277:0:0:0 T 692 MaxDepth END=142832;SVTYPE=DEL;SVLEN=-1963;IMPRECISE;CIPOS=-86,86;CIEND=-98,98 GT:FT:GQ:PL:PR 0/1:PASS:390:742,0,387:32,40 +GL000199.1 158220 MantaDUP:TANDEM:3:11210:11210:9:0:0 T 351 PASS END=159624;SVTYPE=DUP;SVLEN=1404;IMPRECISE;CIPOS=-131,131;CIEND=-126,127 GT:FT:GQ:PL:PR 0/1:PASS:351:401,0,848:60,37 +GL000217.1 42384 MantaBND:5625:0:1:0:0:0:1 T [21:9551401[T 174 PASS SVTYPE=BND;MATEID=MantaBND:5625:0:1:0:0:0:0;IMPRECISE;CIPOS=-313,313;BND_DEPTH=90;MATE_BND_DEPTH=75 GT:FT:GQ:PL:PR 0/1:PASS:47:224,0,44:5,12 +GL000217.1 109405 MantaDEL:3:2064:2064:0:1:0 AAAAGCCGTGGCAGGGAGGGGCAAAAAGCCGTGGTGGGCAAAAAGCCGCGGCAGCTGGGGGCGAAAAGCGGCGGCGGGTAAAAAGCCGCGGCGGCGGGGGGGGCAAAAAGCCGCGGCGGGCAAGAAGCCGCGGCGGCAAAAAGCCACGGCGCGGGGACGAAA AG 62 PASS END=109566;SVTYPE=DEL;SVLEN=-161;CIGAR=1M1I161D GT:FT:GQ:PL:PR:SR 0/1:PASS:62:112,0,118:17,20:31,7 +GL000217.1 151317 MantaBND:3:17933:17940:0:0:0:0 A [21:9442633[A 17 MinQUAL SVTYPE=BND;MATEID=MantaBND:3:17933:17940:0:0:0:1;IMPRECISE;CIPOS=-117,118;BND_DEPTH=127;MATE_BND_DEPTH=128 GT:FT:GQ:PL:PR 0/1:PASS:17:67,0,611:41,11 +GL000217.1 155442 MantaDEL:3:18182:18182:0:2:5 GCCGCGGCGGCGGAGCCAAAAAGCCTCGGCGGCAAAAACCCGCGGTGGCGGGGGCAAAAAGACGCGGCGGCCAAAAGCCGCGGCGGCGGGGAGTAAAAAGCCGCGTCGGCAAAAGCCGCGGCGAGGGGGGTAAAAAGCCGCGTTGGCAAAAA G 69 PASS END=155593;SVTYPE=DEL;SVLEN=-151;CIGAR=1M151D;CIPOS=0,12;HOMLEN=12;HOMSEQ=CCGCGGCGGCGG GT:FT:GQ:PL:PR:SR 0/1:PASS:55:119,0,53:27,3:11,4 +GL000216.1 116789 MantaINS:1181:0:0:0:8:0 A 133 PASS END=116808;SVTYPE=INS;LEFT_SVINSSEQ=TATATTTATATATACATTTATATATACTTTACTATGATAGACAT;RIGHT_SVINSSEQ=ATATATACACATATATACACACACACACATATACATATACATATATATACAT GT:FT:GQ:PL:PR:SR 0/1:PASS:133:183,0,439:2,0:25,7 +GL000216.1 148942 MantaDEL:1248:0:0:0:1:0 ATATCACCTGGGAGATCAGTGCAAAGGTATATCACAAGGCCCTATGTAGGCAAAGCCTAGACAGTAGTTACATCAGTTGGGTGATCAGTGGCGAGATCTCTCACAATTCCCCTGTAGGCAGAGCTTATATAACAGTTAC AA 305 MaxDepth;MaxMQ0Frac END=149080;SVTYPE=DEL;SVLEN=-138;CIGAR=1M1I138D GT:FT:GQ:PL:PR:SR 0/1:PASS:305:355,0,689:54,6:209,13 +GL000205.1 64758 MantaDEL:412:0:0:0:1:0 CTTTCTTTCTTTCTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTCTTTCTTTCTTTCTTCTTTCTTTCTTTCTTTCT C 999 PASS END=64844;SVTYPE=DEL;SVLEN=-86;CIGAR=1M86D GT:FT:GQ:PL:PR:SR 0/1:PASS:152:999,0,149:30,0:66,24 +GL000205.1 89982 MantaDEL:456:0:0:0:1:0 TGTGCTGGACACCTTGGTGGTTCCACTTGTCCCATTGTGATGCCCATGGCCTCCTGGACTTTAGTACATGTTCTGACGTTGCAAGATTCCGTCGGCACCATGGGAGCCGCTTCCTCTACTGTCATTGAAACACCCCTGGGATGTATATTTAAAAATTGAAACAGCTTTTGGCTAGATGAACCAAAAAAAAAAAAAAGAAAAGAAAAAACTTATCTTCTTTTGTAATACTATTTAGCTTGCATACAGATTAGCTCACAAAACATGGCTGGGGAATGAGACTGTGAACTTTAACACCCTCCTACAGCTAGATCTTTTCTGTAGAAATCAGGGAAAATGGTCTGAAATATCCTATGTGCAAACCTTTATGGCCTGACAACAAAACCCAGCTCTATGCAGCACCTGTGGGCTAAAGCCTAGTAAGCCACAAAGCCCCTCAGAAGCATTAGAAGATCATCTCTTATTAAGGGGAAGGGACCCCAGACCCAACAGCCCAA T 416 PASS END=90475;SVTYPE=DEL;SVLEN=-493;CIGAR=1M493D GT:FT:GQ:PL:PR:SR 0/1:PASS:416:466,0,999:221,17:244,23 +GL000205.1 94604 MantaBND:448:0:1:1:0:0:0 C [GL000195.1:24097[C 24 PASS SVTYPE=BND;MATEID=MantaBND:448:0:1:1:0:0:1;IMPRECISE;CIPOS=-212,213;BND_DEPTH=305;MATE_BND_DEPTH=395 GT:FT:GQ:PL:PR 0/1:PASS:24:74,0,999:253,44 +GL000205.1 103024 MantaBND:429:0:1:1:0:0:1 A [GL000195.1:16560[A 126 PASS SVTYPE=BND;MATEID=MantaBND:429:0:1:1:0:0:0;IMPRECISE;CIPOS=-141,142;BND_DEPTH=205;MATE_BND_DEPTH=289 GT:FT:GQ:PL:PR 0/1:PASS:126:176,0,999:296,56 +GL000205.1 110471 MantaINS:436:0:0:0:0:0 C CGAAAGTGCCTTCCCATCAGCCCCAGCGCATGGCCCCGGGACCCTGACATCTCTGGTTTGAACCCAGGGTGCGTCTCGGGCCCGATAGGGGTACCCCAAAGCGAGCAGAAGGCCCCTGAGGGGGAAGACTAGGTTTGAGGGAGGGGAGGGGAGGCAATTGTGGCAGGAAAAAAAACAAAAAACAAAAAACACAGCGCCGTCAAGAAGCGGGCCTGTGTCCATCACATAA 999 PASS END=110471;SVTYPE=INS;SVLEN=228;CIGAR=1M228I;CIPOS=0,23;HOMLEN=23;HOMSEQ=GAAAGTGCCTTCCCATCAGCCCC GT:FT:GQ:PL:PR:SR 0/1:PASS:679:999,0,676:68,3:14,179 +GL000205.1 142318 MantaDEL:453:0:0:0:0:0 CTCCCTCACATAGGATTCCAAAAGACTGCTACGAGGTTCTGAATTATTCTCCATCACATGGGATTCCAGAACACCCCTGCTGTCTTCTGAATGTTTCTCCCTCACATAGTGTTCCATAACACTACTGCTGGGTTCTGAGAGTTTT C 317 PASS END=142462;SVTYPE=DEL;SVLEN=-144;CIGAR=1M144D;CIPOS=0,19;HOMLEN=19;HOMSEQ=TCCCTCACATAGGATTCCA GT:FT:GQ:PL:PR:SR 0/1:PASS:317:367,0,999:74,32:88,24 +GL000205.1 144054 MantaDUP:TANDEM:3:91:91:2:2:0 G 146 PASS END=144150;SVTYPE=DUP;SVLEN=96;CIPOS=0,11;CIEND=0,11;HOMLEN=11;HOMSEQ=GGATTCCAGAA GT:FT:GQ:PL:PR:SR 0/1:PASS:146:196,0,999:93,0:180,16 +GL000205.1 148170 MantaBND:3:113:150:0:1:0:0 C [1:142653895[GCAC 324 MaxDepth;NoPairSupport SVTYPE=BND;MATEID=MantaBND:3:113:150:0:1:0:1;SVINSLEN=3;SVINSSEQ=GCA;BND_DEPTH=188;MATE_BND_DEPTH=283 GT:FT:GQ:PL:PR:SR 0/1:PASS:324:374,0,999:176,0:208,27 +GL000205.1 149555 MantaDEL:464:0:1:0:0:0 T 449 PASS END=157324;SVTYPE=DEL;SVLEN=-7769;CIPOS=0,6;CIEND=0,6;HOMLEN=6;HOMSEQ=ATAGGA GT:FT:GQ:PL:PR:SR 0/1:PASS:449:499,0,999:135,18:123,18 +GL000205.1 153589 MantaDEL:3:55:152:0:0:0 A 283 PASS END=154961;SVTYPE=DEL;SVLEN=-1372;IMPRECISE;CIPOS=-203,204;CIEND=-219,220 GT:FT:GQ:PL:PR 0/1:PASS:283:333,0,999:116,38 +GL000219.1 55210 MantaDEL:391:0:0:0:2:0 AACAGGCTCTCATAGTCACCATCATCACAGGAATCTTGCGCCTGTTACAGGGAAGCACTGTCCTACATCGTATGTG A 999 PASS END=55285;SVTYPE=DEL;SVLEN=-75;CIGAR=1M75D;CIPOS=0,26;HOMLEN=26;HOMSEQ=ACAGGCTCTCATAGTCACCATCATCA GT:FT:GQ:PL:PR:SR 1/1:PASS:65:999,68,0:29,0:24,39 +GL000219.1 111132 MantaDEL:226:0:3:0:0:0 T 999 PASS END=114246;SVTYPE=DEL;SVLEN=-3114;CIPOS=0,10;CIEND=0,10;HOMLEN=10;HOMSEQ=TTACCTTTTC GT:FT:GQ:PL:PR:SR 1/1:PASS:176:999,179,0:0,40:0,22 +GL000219.1 120545 MantaDEL:366:0:0:0:1:0 CCCGGCAGGGCCAGCGCAGGGGTTCTGCAGGGCCGTCCTGTGGCTCTGATATCTATGAATGGGCGGCCAGCAAAGGCACAGGCACCAGCAGAAAGGAGCAGAGACGTCCAGAACATGCTGTCCCCCAATGCCAGCCCCAAGCGAGGCCAGCTTAGGGTGCGGTGGGCAGAGCCATCCTGCAGCTCTGAGATCCACGAGGTGGAGGTGGAAGTAGTGCCAGGCTCCACTGCATTGGAGCTCGCACAATTTGAGCCCAATTTGATTCCGGAGCCATGACCTGT CTGC 749 MaxDepth END=120825;SVTYPE=DEL;SVLEN=-280;CIGAR=1M3I280D GT:FT:GQ:PL:PR:SR 0/1:PASS:408:799,0,404:174,3:394,19 +GL000219.1 121166 MantaBND:310:1:2:0:0:0:0 T T]20:29595124] 675 MaxDepth SVTYPE=BND;MATEID=MantaBND:310:1:2:0:0:0:1;CIPOS=0,1;HOMLEN=1;HOMSEQ=G;BND_DEPTH=349;MATE_BND_DEPTH=242 GT:FT:GQ:PL:PR:SR 0/1:PASS:675:725,0,999:306,4:249,37 +GL000219.1 122613 MantaBND:317:0:1:0:0:0:0 A A]20:29594562] 246 MaxDepth SVTYPE=BND;MATEID=MantaBND:317:0:1:0:0:0:1;IMPRECISE;CIPOS=-200,200;BND_DEPTH=352;MATE_BND_DEPTH=129 GT:FT:GQ:PL:PR 0/1:PASS:246:296,0,999:86,25 +GL000219.1 123128 MantaBND:326:0:1:0:0:0:1 T [4:190835845[T 83 PASS SVTYPE=BND;MATEID=MantaBND:326:0:1:0:0:0:0;IMPRECISE;CIPOS=-132,133;BND_DEPTH=112;MATE_BND_DEPTH=49 GT:FT:GQ:PL:PR 0/1:PASS:83:133,0,999:132,31 +GL000219.1 159372 MantaBND:322:0:1:0:0:0:0 T T]GL000198.1:69848] 999 MaxDepth SVTYPE=BND;MATEID=MantaBND:322:0:1:0:0:0:1;BND_DEPTH=166;MATE_BND_DEPTH=341 GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:61,14:66,48 +GL000224.1 42781 MantaDEL:3:43:47:0:0:0 C 607 PASS END=157865;SVTYPE=DEL;SVLEN=-115084;IMPRECISE;CIPOS=-186,187;CIEND=-234,235 GT:FT:GQ:PL:PR 0/1:PASS:607:657,0,999:82,46 +GL000224.1 73529 MantaDEL:39:0:0:0:0:0 ATTCCCCACCGCAGAAAAGCCTTCTACTACTGACTGGACTGGAGAATACATGCCTCAGTTCCGTTGTTCCTTGGCTGTGTAACTCTGAGGTCCAT AA 917 PASS END=73623;SVTYPE=DEL;SVLEN=-94;CIGAR=1M1I94D GT:FT:GQ:PL:PR:SR 0/1:PASS:772:967,0,769:36,10:78,51 +GL000195.1 4607 MantaDEL:110119:0:1:0:0:0 C 999 PASS END=6132;SVTYPE=DEL;SVLEN=-1525;CIPOS=0,2;CIEND=0,2;HOMLEN=2;HOMSEQ=AT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:121,101:114,78 +GL000195.1 16560 MantaBND:429:0:1:1:0:0:0 A [GL000205.1:103024[A 126 PASS SVTYPE=BND;MATEID=MantaBND:429:0:1:1:0:0:1;IMPRECISE;CIPOS=-185,185;BND_DEPTH=289;MATE_BND_DEPTH=205 GT:FT:GQ:PL:PR 0/1:PASS:126:176,0,999:296,56 +GL000195.1 24097 MantaBND:448:0:1:1:0:0:1 T [GL000205.1:94604[T 24 PASS SVTYPE=BND;MATEID=MantaBND:448:0:1:1:0:0:0;IMPRECISE;CIPOS=-203,204;BND_DEPTH=395;MATE_BND_DEPTH=305 GT:FT:GQ:PL:PR 0/1:PASS:24:74,0,999:253,44 +GL000195.1 55076 MantaDEL:7240:11:11:1:0:0 TAAGATAAGACAGTGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAAGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGTTCGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAAGCGGAGCTTGCCGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCCGGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAA T 125 PASS END=55399;SVTYPE=DEL;SVLEN=-323;CIGAR=1M323D;CIPOS=0,11;HOMLEN=11;HOMSEQ=AAGATAAGACA GT:FT:GQ:PL:PR:SR 0/1:PASS:125:175,0,999:117,9:110,15 +GL000195.1 56177 MantaINS:110198:0:0:0:0:0 T TTCAAATCAAATTTGTTTTTTTGTTTTTTTGTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGACTGCGGACTGCAGTGGCGCAATCTCTGCTCACTGCAAGCTCCGCTTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCGCCCGCCACCGCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCTTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCATGATCCACCCGCCTCGGCCTCCCAAAGTGCTCGGATTACAGGCGTGAGCCACCGCGCCTGGCC 496 PASS END=56177;SVTYPE=INS;SVLEN=330;CIGAR=1M330I;CIPOS=0,18;HOMLEN=18;HOMSEQ=TCAAATCAAATTTGTTTT GT:FT:GQ:PL:PR:SR 0/1:PASS:496:546,0,999:141,23:108,17 +GL000195.1 71292 MantaDEL:110169:0:0:0:0:1 ACGTTTCTTCTGCATAAGCAAAAGGTACAAAAATTAAAATACTTAATGTGATGGTTA ATTAAGCT 999 PASS END=71348;SVTYPE=DEL;SVLEN=-56;CIGAR=1M7I56D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:34,0:115,73 +GL000222.1 60338 MantaDEL:117338:0:0:0:0:0 CCAATAGCACAGTAGTTTATGCAAACACGTAGAGGTAGTGCCTTGGTGGCCTTGGATTAACATCAAGAAAAATTCTCTTGATTTATCAAGTAGAGACTCTTGTTCTCTTCCCTTACTTTCTTTTAAATCA CG 587 MaxDepth END=60467;SVTYPE=DEL;SVLEN=-129;CIGAR=1M1I129D GT:FT:GQ:PL:PR:SR 0/1:PASS:587:637,0,808:62,18:119,18 +GL000222.1 61283 MantaDEL:117344:0:0:0:4:0 GGTACAGGGGTTCTTTAGTCAGCAGGTGATGAATTCTGCCAGGTCTTTACTGACATGGCAGCACTGAGTTTAATGTAAAGTCCTCCAGTCCT GTTCCTCTAATCAC 60 MaxDepth END=61374;SVTYPE=DEL;SVLEN=-91;CIGAR=1M13I91D GT:FT:GQ:PL:PR:SR 0/1:PASS:60:110,0,282:55,0:50,8 +GL000193.1 68524 MantaBND:857:0:1:0:0:0:1 G G]4:190479009] 222 PASS SVTYPE=BND;MATEID=MantaBND:857:0:1:0:0:0:0;IMPRECISE;CIPOS=-150,151;BND_DEPTH=118;MATE_BND_DEPTH=49 GT:FT:GQ:PL:PR 0/1:PASS:222:272,0,999:74,28 +GL000194.1 47237 MantaDEL:1077:0:0:0:0:0 TTGGACTTCATCAAAATTAAACATTTTTGCACTTCAAAGGACACCGTCAAGAAAGTGAAAAGACAACTCACAAGA T 289 MaxMQ0Frac END=47311;SVTYPE=DEL;SVLEN=-74;CIGAR=1M74D;CIPOS=0,4;HOMLEN=4;HOMSEQ=TGGA GT:FT:GQ:PL:PR:SR 0/1:PASS:180:339,0,177:0,0:26,8 +GL000194.1 82300 MantaDEL:1105:0:0:0:0:0 ACAGAACACTGCTGCTGGAGTCTGAATGTTTGTCAGTCACATAGAATTCCAGAACACTGCTACAAGGGTGTCAATATTTCTCCCTCACCTAGTATTC A 416 PASS END=82396;SVTYPE=DEL;SVLEN=-96;CIGAR=1M96D;CIPOS=0,10;HOMLEN=10;HOMSEQ=CAGAACACTG GT:FT:GQ:PL:PR:SR 0/1:PASS:416:466,0,717:13,9:64,15 +GL000225.1 4921 MantaDEL:3:19821:19821:3:0:0 TGATCAGTGCAGAGATATGTCACAATGTCCCTGTAGGCAGAGCATAGAGAAGTGTTGTATCACCTGAGC T 309 PASS END=4989;SVTYPE=DEL;SVLEN=-68;CIGAR=1M68D;CIPOS=0,26;HOMLEN=26;HOMSEQ=GATCAGTGCAGAGATATGTCACAATG GT:FT:GQ:PL:PR:SR 0/1:PASS:309:359,0,415:22,0:40,11 +GL000225.1 24224 MantaDEL:533:0:0:0:0:0 CCTGATCACCCAGGAGATGTAAATCTTGGCTAGGCTCGGCCTACAGGGGCTTTGTGACATATTTCTCCA C 999 PASS END=24292;SVTYPE=DEL;SVLEN=-68;CIGAR=1M68D;CIPOS=0,10;HOMLEN=10;HOMSEQ=CTGATCACCC GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:36,0:132,31 +GL000225.1 71951 MantaDEL:3:19861:19861:0:1:0 ACCACGCATCCTAAAACTCCTCCTGCAGAGGCCGGTATTCTTCCTCCCTGATTGGTGATTCCAGCGAGGTGGCCTCTTCCAAGGCCTCCAG A 316 PASS END=72041;SVTYPE=DEL;SVLEN=-90;CIGAR=1M90D;CIPOS=0,1;HOMLEN=1;HOMSEQ=C GT:FT:GQ:PL:PR:SR 0/1:PASS:316:366,0,906:48,0:136,13 +GL000225.1 76478 MantaDEL:3:19878:19878:0:3:0 GATCCCCCAGGTGATGCAACTATTCTCTATGGTCTGCCTACAGGGGAAATTGTGACATAACTCTGCACTT G 100 PASS END=76547;SVTYPE=DEL;SVLEN=-69;CIGAR=1M69D;CIPOS=0,3;HOMLEN=3;HOMSEQ=ATC GT:FT:GQ:PL:PR:SR 0/1:PASS:100:150,0,999:43,0:105,9 +GL000225.1 143516 MantaDEL:1227:0:0:0:1:0 GAGCATTTTGACATATCTCTGCACTGATCACTGAGATGATGTAACTCTTCTCTGGGCTTTGCCTACAGGA G 208 PASS END=143585;SVTYPE=DEL;SVLEN=-69;CIGAR=1M69D GT:FT:GQ:PL:PR:SR 0/1:PASS:208:258,0,999:27,0:118,12 +NC_007605 139719 MantaDEL:110055:0:1:0:0:0 G 999 PASS END=151550;SVTYPE=DEL;SVLEN=-11831;CIPOS=0,4;CIEND=0,4;HOMLEN=4;HOMSEQ=GGCC GT:FT:GQ:PL:PR:SR 1/1:PASS:999:999,999,0:0,312:0,185 \ No newline at end of file diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf new file mode 100644 index 0000000..570bd53 --- /dev/null +++ b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf @@ -0,0 +1,135 @@ +##fileformat=VCFv4.1 +##fileDate=20220720 +##source=GenerateSVCandidates 1.6.0 +##reference=file:///home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##ALT= +##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 +1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 0/1:PASS:26:76,0,186:13,5 +1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 1/1:MinGQ:3:372,4,0:3,14:1,3 +1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 0/1:PASS:602:652,0,794:27,0:110,24 +1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 1/1:PASS:71:999,74,0:0,28:0,26 +1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 1/1:PASS:82:940,85,0:1,0:0,34 +1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 0/1:PASS:999:999,0,999:80,9:106,35 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py b/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py new file mode 100644 index 0000000..f522ebb --- /dev/null +++ b/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py @@ -0,0 +1,94 @@ +################################################################# +# Libraries +################################################################# + +import pytest +import filecmp + + +preprocess = __import__("preprocess_liftover") + + +def test_non_standard_chromosomes(tmp_path): + """ + This test checks if non standard chromosomes are not saved to the output VCF + """ + + # Variables and Run + args = { + "inputfile": "test/files/liftover_vcf_non_standard_chrom_in.vcf", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE1"], + } + + preprocess.main(args) + assert filecmp.cmp(f"{tmp_path}/output.vcf", "test/files/liftover_vcf_correct_out.vcf") == True + +def test_missing_chr(tmp_path): + """ + This test checks if chr prefix is added to a non chr based VCF + """ + + # Variables and Run + args = { + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE1"], + } + + preprocess.main(args) + assert filecmp.cmp(f"{tmp_path}/output.vcf", "test/files/liftover_vcf_correct_out.vcf") == True + + +def test_wrong_sample_ids(tmp_path): + """ + This test checks if the VCF sample identifiers match the expected sample IDs + """ + # Variables and Run + args = { + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE1", "SAMPLE2"], + } + + with pytest.raises(ValueError) as exc: + preprocess.main(args) + + + assert "Sample names ['SAMPLE1', 'SAMPLE2'] do not match sample identifies in the VCF ['SAMPLE1']" in str(exc.value) + + + + args = { + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE2"], + } + + + with pytest.raises(ValueError) as exc: + preprocess.main(args) + + + assert "Sample names ['SAMPLE2'] do not match sample identifies in the VCF ['SAMPLE1']" in str(exc.value) + + + args = { + "inputfile": "test/files/liftover_vcf_two_samples_in.vcf", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE3"], + } + + + with pytest.raises(ValueError) as exc: + preprocess.main(args) + + + assert "Sample names ['SAMPLE3'] do not match sample identifies in the VCF ['SAMPLE1', 'SAMPLE2']" in str(exc.value) + + + + + + + diff --git a/portal_objects/file_reference.json b/portal_objects/file_reference.json index 7949e61..111dc9b 100644 --- a/portal_objects/file_reference.json +++ b/portal_objects/file_reference.json @@ -33,5 +33,17 @@ "extra_files": [], "status": "uploading", "accession": "GAPFIGWSGHNU" - } + }, + { + "uuid": "47e4b517-d81c-4184-a4d2-e69b10197e9d", + "institution": "INSTITUTION_UUID", + "project": "PROJECT_UUID", + "file_format": "chain", + "description": "chain file for coordinates liftover, hg19 to hg38", + "aliases": [ + "cgap:hg19-to-hg38-chain" + ], + "status": "uploading", + "accession": "GAPFIYPTQVC8" +} ] diff --git a/portal_objects/software.json b/portal_objects/software.json index 15f3f4f..d69da63 100644 --- a/portal_objects/software.json +++ b/portal_objects/software.json @@ -45,5 +45,41 @@ "title": "pigz_pigz-2.6", "uuid": "c730b361-1b4d-4d29-8eff-8f7576d776e2", "version": "2.6" + }, + { + "project": "PROJECT_UUID", + "institution": "INSTITUTION_UUID", + "name": "gatk", + "software_type": [ + "variant caller" + ], + "source_url": "https://software.broadinstitute.org/gatk/gatk4", + "title": "gatk_4.2.6.1", + "uuid": "7b86efcc-60c4-442b-b803-eeccc63ff175", + "version": "4.2.6.1" + }, + { + "project": "PROJECT_UUID", + "institution": "INSTITUTION_UUID", + "name": "picard", + "software_type": [ + "filter" + ], + "source_url": "https://broadinstitute.github.io/picard/", + "title": "picard_2.26.11", + "uuid": "0c17c344-5949-4fe9-a494-8283724152c1", + "version": "2.26.11" + }, + { + "project": "PROJECT_UUID", + "institution": "INSTITUTION_UUID", + "name": "granite", + "software_type": [ + "filter" + ], + "source_url": "https://github.com/dbmi-bgm/granite", + "title": "granite_0.2.0", + "uuid": "04e57a90-471b-431d-a5ec-b94b3dacf219", + "commit": "0.2.0" } ] diff --git a/portal_objects/workflows/workflow_liftovervcf.json b/portal_objects/workflows/workflow_liftovervcf.json new file mode 100644 index 0000000..6b14070 --- /dev/null +++ b/portal_objects/workflows/workflow_liftovervcf.json @@ -0,0 +1,152 @@ +{ + "accession": "GAPWFBH24FJK", + "app_name": "workflow_liftovervcf", + "app_version": "VERSION", + "arguments": [ + { + "argument_format": "vcf_gz", + "argument_type": "Input file", + "workflow_argument_name": "input_vcf" + }, + { + "argument_format": "fa", + "argument_type": "Input file", + "workflow_argument_name": "reference" + }, + { + "argument_format": "chain", + "argument_type": "Input file", + "workflow_argument_name": "chain" + }, + { + "argument_type": "parameter", + "workflow_argument_name": "sample_names" + }, + { + "argument_format": "vcf_gz", + "argument_type": "Output processed file", + "workflow_argument_name": "vcf_lifted", + "secondary_file_formats": ["vcf_gz_tbi"] + }, + { + "argument_format": "vcf_gz", + "argument_type": "Output processed file", + "workflow_argument_name": "reject" + } + ], + "project": "PROJECT_UUID", + "category": ["processing"], + "cwl_child_filenames": ["preprocess_liftover.cwl", "gatk_liftover.cwl"], + "cwl_directory_url_v1": "s3://CWLBUCKET/PIPELINE/VERSION", + "cwl_main_filename": "workflow_gatk_liftover.cwl", + "cwl_pointer": "", + "description": "Lifts over a VCF file", + "institution": "INSTITUTION_UUID", + "name": "workflow_liftovervcf_VERSION", + "aliases": ["cgap:workflow_liftovervcf_VERSION"], + "steps": [ + { + "inputs": [ + { + "meta": { + "cardinality": "single", + "file_format": "vcf_gz", + "global": true, + "type": "data file" + }, + "name": "vcf", + "source": [ + { + "name": "input_vcf" + } + ] + }, + { + "meta": { + "cardinality": "single", + "file_format": "fa", + "global": true, + "type": "reference file" + }, + "name": "reference_fasta", + "source": [ + { + "name": "reference" + } + ] + }, + { + "meta": { + "cardinality": "single", + "file_format": "chain", + "global": true, + "type": "reference file" + }, + "name": "chain", + "source": [ + { + "name": "chain" + } + ] + }, + { + "meta": { + "cardinality": "single", + "global": true, + "type": "parameter" + }, + "name": "sample_names", + "source": [ + { + "name": "sample_names" + } + ] + } + ], + "meta": { + "analysis_step_types": [ + "format conversion" + ], + "software_used": [ + "/softwares/7b86efcc-60c4-442b-b803-eeccc63ff175", + "/softwares/04e57a90-471b-431d-a5ec-b94b3dacf219", + "/softwares/0c17c344-5949-4fe9-a494-8283724152c1" + ] + }, + "name": "liftovervcf", + "outputs": [ + { + "meta": { + "cardinality": "single", + "file_format": "vcf_gz", + "global": true, + "type": "data file" + }, + "name": "vcf_lifted", + "target": [ + { + "name": "vcf_lifted" + } + ] + }, + { + "meta": { + "cardinality": "single", + "file_format": "vcf_gz", + "global": true, + "type": "data file" + }, + "name": "reject", + "target": [ + { + "name": "reject" + } + ] + } + ] + } + ], + "title": "Liftover VCF", + "uuid": "cb916470-3ec5-4e33-8dcf-c6e147bf31a6" + } + \ No newline at end of file From b0b7ff3c8cc6b89bb394b01bfaf3c38df1423244 Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Tue, 9 Aug 2022 19:27:51 +0000 Subject: [PATCH 04/21] liftover update --- cwl/gatk_liftover.cwl | 2 +- cwl/preprocess_liftover.cwl | 2 +- .../scripts/preprocess_liftover.py | 14 +- .../files/liftover_vcf_chr_missing_in.vcf | 135 ------------ .../files/liftover_vcf_chr_missing_in.vcf.gz | Bin 0 -> 3184 bytes .../liftover_vcf_non_standard_chrom_in.vcf | 194 ------------------ .../liftover_vcf_non_standard_chrom_in.vcf.gz | Bin 0 -> 8441 bytes .../files/liftover_vcf_two_samples_in.vcf | 135 ------------ .../files/liftover_vcf_two_samples_in.vcf.gz | Bin 0 -> 3202 bytes .../scripts/test/test_preprocess_liftover.py | 16 +- .../workflows/workflow_liftovervcf.json | 28 ++- 11 files changed, 36 insertions(+), 490 deletions(-) delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf create mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz diff --git a/cwl/gatk_liftover.cwl b/cwl/gatk_liftover.cwl index 6fbec25..6d9dbad 100644 --- a/cwl/gatk_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -9,7 +9,7 @@ requirements: hints: - class: DockerRequirement - dockerPull: picard_liftover:1.0.0 + dockerPull: ACCOUNT/gatk_liftover_vcf:VERSION baseCommand: [gatk, LiftoverVcf] diff --git a/cwl/preprocess_liftover.cwl b/cwl/preprocess_liftover.cwl index 372b95a..9a93e9f 100644 --- a/cwl/preprocess_liftover.cwl +++ b/cwl/preprocess_liftover.cwl @@ -9,7 +9,7 @@ requirements: hints: - class: DockerRequirement - dockerPull: ACCOUNT/gatk_liftover:VERSION + dockerPull: ACCOUNT/gatk_liftover_vcf:VERSION baseCommand: [python3, /usr/local/bin/preprocess_liftover.py] diff --git a/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py b/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py index 2d6964e..4214459 100644 --- a/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py +++ b/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py @@ -14,9 +14,12 @@ from granite.lib import vcf_parser import argparse -#list of standard chromosomes -std_chromosomes = list(map(str, list(range(1,23)))) + ["X", "Y"] +#Constants +CHR_PREFIX = 'chr' +#list of standard chromosomes +std_chromosomes = [str(chrom) for chrom in list(range(1,23))] + ["X", "Y"] +std_chromosomes += [CHR_PREFIX + chrom for chrom in std_chromosomes] ################################################ # Functions @@ -33,7 +36,7 @@ def main(args): # 1. Check if sample names match genotype IDs sample_names = args['sample_names'] vcf_sample_names = vcf.header.IDs_genotypes - sample_names_err = f"Sample names {sample_names} do not match sample identifies in the VCF {vcf_sample_names}" + sample_names_err = f"Sample names {sample_names} do not match sample identifires in the VCF {vcf_sample_names}" if len(sample_names) != len(vcf_sample_names): raise ValueError(sample_names_err) @@ -47,13 +50,12 @@ def main(args): vcf.write_header(output) for vnt in vcf.parse_variants(): - # 2. Exclude non standard chromosomes if vnt.CHROM in std_chromosomes: # 3. Add 'chr' to CHROM if not present - if vnt.CHROM.startswith("chr") == False: - vnt.CHROM = f"chr{vnt.CHROM}" + if vnt.CHROM.startswith(CHR_PREFIX) == False: + vnt.CHROM = f"{CHR_PREFIX}{vnt.CHROM}" vcf.write_variant(output, vnt) diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf deleted file mode 100644 index 22bd7be..0000000 --- a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf +++ /dev/null @@ -1,135 +0,0 @@ -##fileformat=VCFv4.1 -##fileDate=20220720 -##source=GenerateSVCandidates 1.6.0 -##reference=file:///home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT=0.999"> -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##ALT= -##ALT= -##ALT= -##ALT= -##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 -1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 -1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 -1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 -1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 -1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 -1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9be755323b1ef54b4df96f02bad3fa903339c53 GIT binary patch literal 3184 zcmV-$43G04iwFn<@8V(r18iw#bZ>TLa$k01W?y4ya$jv}b8~5KXJ2V}T@h*NygMG-N^dU%7wtL*Z5VyMy(kuYg%Q(uyP5-CquunQ$7HzVp zJ~dowY8E9wR|V)%Kqs!N+j>nvF9e9|fQN>z)dY;A03oJ9ZL20=mI99E5|dhW4OS^& z6HuX9c1^%81x%Odj%C$BI;DVN>6QUFYXa_3fKZ}?uQdUpl@hvX>V|F9K@cKIXqsh` zx}$_1C3MFmj%(H>j%*TG?>e?qmpIa?*}CDHMx8Bkx5!CG2r0vsrHqq( zm`lkPE!as-QpA;Y*~G3VNRCUFr3|+PmlPr0fil%BCG+K&S|fVmR&=?fMVBK$_l6|1nEmbDXGFn1JB`2Zi>n)XnI^4)Y$;B0VQcF1}GB%5llP;ed)hc36b0vU5LKZV<%f=+K3sCM(8c`kv*#(>pn&~BS z6l|ce#i&&gqi%~fvm$01EqPQCgA8vD7Li@BUD8sGsZkNL+?ITk>2+WPXv78sj4TGj zNK4D4vY6$VE!b*eR?8-pGsLlB&T7V1aRJ&u%LbGQfsWFWy;SlPvWlDx>$x_`1sMN~ z6GCR*gQnS*cAO+(_lf#9osB-aBw)*o+!O=rl`WfJbQ zFpl~^On3W)EyI+lX_g$8*&zwG>T{5U0Y3ksqIGfa|1vLC5}|UDC1F%kiVxIyeWEhT z>I$z%vqE22=$l3v+pwgna2T%h1D6^`s_(Z~pN?Bg`VcZfcFVg+n<#RVVP3hif4VX6Xaak8o!@}~2Ti^3E=W6F!8b-2p? z$yo9X3g7UFl?SS128{N0Q3{tKlZvG2j7C&?*a-PjCw?L~tku zTPwEBiLV;D8Tg~=uomAYE7KwJy+dt}2@8H_QH|Kq{b=UDBJqSG_KZ;BY#lEljUi@P z+@klx$D3DX;I+s((Eb>}X4wZ}q9WN@xCENv^dE2I&5Igjxpi)DA44G06O)V4?JG!G zus;qwn1A2q;(j_?e6^!tl(M9$3V^b&ccDew^=Rxre1*z?NAYK@0P9Rmfs2Y}o}ED? z-GHZu==XdH`c*Z4H=TU=2J1<3R}6v&bw zO7W6gYff?8gb#niQo?pI?8~aDGh6jvpJtvv{WyEL{WQ3Kjk&3)w2OQ7w?nk#>Y=>u zgCs-#fKR^p)psH5s)4#YY_sqpSp60(A+uhJ0{y-ts`5HyTd3km{0z}axnjftV%r1b z0m56yUg+2qDWgSMvY{1leZ>*+!n5x!cb3EpEuc^d~eGo;! zG%x&sB*A@|9`+dQGL;jy=ED#4<6`(JV8D375&)h;YSa-dUWovhRMBqwwFWzUfmr(a z31&BS{!WPh7jM4*S?jv4D_~8p{n4EuW*djAmnw)c=oyZ#)qkkNPxE5GryXwb7zNk{ zF>I1;u-_wPg_!#8a|m@T4R_%-NZ{`m_8x{JoLXR{P7x|JPA;!;NCn zEO&T=@ta$1oCM2LegZY!;qEGwy!(m3S%QxZ>3c!d8Wt8!NrVNWhHJ#8O9crxgZZsi zOpOfgfI9^uU}nh}EY!M29o#IQLy|5;a-QNV!;!)u$+tz6ZC_Lf$)?~7F^WXz@DT5Y zkDM5@jvB@C)3n?NyquS_sK_wxS@e>7);%FIjCgOVpyXX5g^`|NB!Q1qz@l1xxhQo| z6>EhfHz$f(%Be+g;_k&3Mzmw~dlv}gqFsr7c{%pqH$a8zSFLlmO)!b6z7xt-*a6!R zcmfkSB5wOc_lJ0Q$JSNlI8%Ok*~AAt-xMdVCA#t|?QjAsnp#T(tBAp_uJ5ER%j!}v zoYj@dlVqvv)4b;oX2a1CT{4F8Y(eU5>xUrXmG!*gTwoO zWZ(&2xK>o^p~`}C5P&PZ=YvAjghy#)ijuxWx2v%LkbhHLmiDj3x z!u63?;uOS+D`fW6`<6uGOxvUfAUZZ?8W8N@i-jy(hHE&cU>QamVA(KjiRFpH8Q`9Q z86Q0dioiFtEwT(r#1e%JV-S2_aU8prOTtj$HJHl1Hc9ETNv5UCX5q0}I*c&R`RG8S zH?)ppVj6Kjbt8Atax^)Sgo~oR5=BOjAsb+5h1WdxzKFEsuvc>!mkj)TLC%!d#8hqJ zYi&*V62G)nEU87sKUwJ2K%TMXfBlj+D>W(xr%A&X`OO?wzId79G4Za4g`2{B2KKNuHp68X4Z})*$RB(?k$D+XPa -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT=0.999"> -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##ALT= -##ALT= -##ALT= -##ALT= -##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 -1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 -1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 -1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 -1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 -1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 -1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 -GL000198.1 76768 MantaDEL:295:3:3:3:3:0 ACCACAAACAAGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGGGATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCGCCCGCCACTACGCCCGGCTAATTTTTTGTATTTTTTAGTAGAGACGGGGTTTCACCGTTTTAGCCGGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCAGTCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGC A 299 MaxDepth END=77097;SVTYPE=DEL;SVLEN=-329;CIGAR=1M329D;CIPOS=0,15;HOMLEN=15;HOMSEQ=CCACAAACAAGTTTT GT:FT:GQ:PL:PR:SR 0/1:PASS:162:349,0,159:38,32:17,60 -GL000198.1 80167 MantaBND:561:0:7:0:0:0:1 G ]4:190809293]G 172 PASS SVTYPE=BND;MATEID=MantaBND:561:0:7:0:0:0:0;IMPRECISE;CIPOS=-165,166;BND_DEPTH=203;MATE_BND_DEPTH=42 GT:FT:GQ:PL:PR 0/1:PASS:172:222,0,999:125,30 -GL000198.1 86535 MantaBND:350:1:4:0:0:0:1 G G[4:190815411[ 999 MaxDepth SVTYPE=BND;MATEID=MantaBND:350:1:4:0:0:0:0;CIPOS=0,27;HOMLEN=27;HOMSEQ=TGAGTGGGATTACTGAATCATATGGTA;BND_DEPTH=188;MATE_BND_DEPTH=300 GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:55,53:32,22 -GL000208.1 15013 MantaDUP:TANDEM:3:993:1001:1:0:0 T 999 PASS END=29227;SVTYPE=DUP;SVLEN=14214;SVINSLEN=2;SVINSSEQ=AG GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:87,36:125,31 -GL000208.1 15267 MantaDEL:3:993:1001:2:1:0 C 999 PASS END=29480;SVTYPE=DEL;SVLEN=-14213;CIPOS=0,5;CIEND=0,5;HOMLEN=5;HOMSEQ=TGTCT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:91,36:92,32 -GL000208.1 81516 MantaDEL:3:3753:3753:0:8:0 TCAAAACTGCTCTATCAAAGGAATGGTTCATCTCTCTGGGTTCAATGCACACATCACAAAGAAGTTTCTGAGAATGCTTCTGGCTAGTTTGTATGTGAAGATATTCCCACTTCCAAAAAAGGCTTCAAGGCGCTCCAAATATTCACCTGCAATTGTACAAAAGTGTGTTTCAAAACTGTTCTGTCAAAAGGAAGGTTCAACTCTGTGAGTTGAATGCACACTTCACAGAGATGTTTCTGAGAATGCTTCTTTCTAGTTTTTCTGTGAAGATATTTCCTTCTCCACCATAGCCCTCAATGCGCTCCAAATGTCCGCTGGCAGATTCCACAGAAACAGTGTTTCAAAACTGCTCTAACAAAAGAAAGGTCCAACTCCGTGATTTGAATGCACACATCACAAAGCAGTTTCTGTGAATCCTTCTGTCTAGTTTTTATATGAGGAGATTTCCTTTTCTACCACGGGCATCAAAGCGTTCCAAATATCCAATTGTAGATTGTA T 193 PASS END=82013;SVTYPE=DEL;SVLEN=-497;CIGAR=1M497D;CIPOS=0,5;HOMLEN=5;HOMSEQ=CAAAA GT:FT:GQ:PL:PR:SR 0/1:PASS:193:243,0,999:70,2:107,13 -GL000228.1 4871 MantaDEL:501:0:0:0:0:1 CAACCTTGTGACTTTGCATGGTTCCGCCCCCATAGCTGCTCTCATGGGCTGGCATTGTGTACCTCTGGCTGGTATTGTGTGTCTCTGGCTTTTCCAGGTGCAAGGTGCAAGCTGTAGCTAGATCTACCATTATGCGGT CTAC 319 MaxDepth;MaxMQ0Frac END=5008;SVTYPE=DEL;SVLEN=-137;CIGAR=1M3I137D GT:FT:GQ:PL:PR:SR 0/1:PASS:319:369,0,477:0,2:37,14 -GL000214.1 52763 MantaINS:1061:0:0:1:0:0 C CTTTCAATGCTGCTGCAAAGGCTTCCTTATATTCTTCTAACTCAGTTGTAACCT 430 PASS END=52763;SVTYPE=INS;SVLEN=53;CIGAR=1M53I;CIPOS=0,53;HOMLEN=53;HOMSEQ=TTTCAATGCTGCTGCAAAGGCTTCCTTATATTCTTCTAACTCAGTTGTAACCT GT:FT:GQ:PL:PR:SR 0/1:PASS:173:480,0,170:0,0:16,11 -GL000214.1 84837 MantaDEL:1010:0:1:0:0:0 GCTTTCCTCTAGGTAAAGATCAGAACTCCAACTAGCACTTAACTCACTGGAAATATCTTAAGAGTCTCAAAATTCACTGCTTTGAATCCCTGACAAGTATAAAAATTTTATACTGAAAACTTCATGCTATTCAAAACATTAAAACAGAAACATCTGACTTAAAGCTTACATTTTTAAAATCTTTTTTATGCTTCTAAATTTGTTTTTATTCAAATATGGATACCAACAATAACATTTATGTCAATGCCTTCTGTTCAATATTGAACAAATAGAATTAGGAATAAGAATAATATGAGTACATCCAATCATTGAATGTACTTTATTTCCAGTATTACATCAAATGTACCTGCTCTCAATGTCTGTACTTTCTTTCTTTGTACTGCTCCTTTCACAGCAGGATCTTCCACTTCAGTGCTAGGCTGAATGGGTTTTAAAAGAAAACGATTCATAAATCATATATATTTTATACAACATGGAGTTAGTGATTCAAAAATATACATAATTAATTATCTTCAAGGAAGGATGTTTTGCAGGAGGCCCTACAAAGCAAAGGGGATATGTCATCAATTATATGTAAGTATGACAGGACCAACCAAACATTCATGCAGTGTTACTGTCGAGCTGAATTCTCAGGCCTGGCTATAAAAATATTTACTTAAGGTTTTGAGGGTTCTTCTTGGCTTCGTCTTTTCATTGCCTAGGACAGCAACATGACAGAAACACAATGAGGAAAATAGGAATATAGGATTCCCAAAATGCACAGTTTACATTTCAGTAGTGAGATTATGTTTCAAATGCCTATACTTAAAATAGAAAAGCATTGATATAACCGTGAACACGTGGACTAATGAGGAGAAAAGGGACCATTAAACAGAGGGGCAAATCAAACCTGAGAGAATCAATGTCAAAGCTGATGGTGAATGTACAGAGTATTTTAA G 999 PASS END=85774;SVTYPE=DEL;SVLEN=-937;CIGAR=1M937D;CIPOS=0,2;HOMLEN=2;HOMSEQ=CT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:152,5:159,60 -GL000218.1 51028 MantaDEL:176149:0:0:0:0:0 ATTTATGCTAATTAAACCAGAAAGTTTCTGTAGGTAATTCAGCTCTGCGCACCATTTAATAGGGTTATGCCAAACAGAACCAATGGTTATATAATACCCAGAATATAAACCTCAGCAAATTCATACTTTTGGAATGGCAACACAAGTGAGAATGAGAGGCAATATTTCATTTCAAAGTACCATAGGCTGCTAAGAGTGTTCTTTTCTTTTAAAAATTTGTGTGGTTGTTAG A 285 MaxMQ0Frac END=51258;SVTYPE=DEL;SVLEN=-230;CIGAR=1M230D;CIPOS=0,1;HOMLEN=1;HOMSEQ=T GT:FT:GQ:PL:PR:SR 0/1:PASS:285:335,0,999:97,3:144,16 -GL000220.1 45590 MantaDEL:927:0:0:0:0:0 TTTGACTTGTTTTATTGTAAAACAAGGCTAATATACAGTATGTGAAAGTTTCCATAAAATTC T 999 PASS END=45651;SVTYPE=DEL;SVLEN=-61;CIGAR=1M61D;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:64,0:181,82 -GL000211.1 45888 MantaDEL:117396:0:0:0:0:0 CGAGGTGCACCCTGTGTTCACACCAGGGACGCCAGTGTCCCCAGGGCCCAGCACAGGGGCTCATCGGAAGGCACTTTCTTCCGTGGGGGACCCAGGCCCCGCTTCTAGGCGGAGCGGTTTTTAATTTTTTTCTCTGCCCCAGGTGTCTCACCTTCCCGTCATGGGCCTTCTGCCCGCCTTGGGGTACCCCTAGCAAGCT C 105 PASS END=46086;SVTYPE=DEL;SVLEN=-198;CIGAR=1M198D;CIPOS=0,4;HOMLEN=4;HOMSEQ=GAGG GT:FT:GQ:PL:PR:SR 0/1:PASS:105:155,0,886:32,9:65,7 -GL000199.1 32551 MantaDEL:3:2276:8026:0:0:0 C 311 PASS END=34266;SVTYPE=DEL;SVLEN=-1715;IMPRECISE;CIPOS=-87,88;CIEND=-84,84 GT:FT:GQ:PL:PR 0/1:PASS:311:361,0,341:26,20 -GL000199.1 62034 MantaDEL:109965:1:1:0:4:0 GAGAAGTTTCTGAGAATGCTTCTGTCTTGATTTTATATGAAGATATTCCCGTTTCCAACGAGACCTTCAAAGCTATCCAAATATCCACTTGCAGATTCTACAAAAAGAGTGTTTCCAAAATGTTGTATCAAAACAAAGGTTCAACTCTGTTAGTTGAGGACACACATCGCAAATAAGTTTCTGAGAATGCTTCTGTCTAGTTTTTATTTGAAGATATTTCCTTTCTTACCATAGGCCTGAAAGCGCTTGAAATGTCCGTTTGCAGATACTACAGAAAGAGTGTTTCAAACATGCTCTATGAAAGGGAATGTTCAGTTCTGTGACGTGAATGCAAACATCACAAAGAAGTTCCTGAGAATGCTTCTCTCTAGATTTTATATGTAATCCCGTTTCCAACGAAATCCTCAAAGCTATCCAAACATCCACTTTCAGATTCCACAAAAAGAGTGTTTCAAAACTGTTCTGTAAAAAGAAAGGTTCATCTCTGTTAGTTGAATACACACATCACAA G 156 MaxDepth;MaxMQ0Frac;NoPairSupport END=62543;SVTYPE=DEL;SVLEN=-509;CIGAR=1M509D;CIPOS=0,26;HOMLEN=26;HOMSEQ=AGAAGTTTCTGAGAATGCTTCTGTCT GT:FT:GQ:PL:PR:SR 0/1:PASS:156:206,0,999:3,0:569,26 -GL000199.1 95746 MantaDEL:3:11211:11231:0:0:0 T 92 MaxDepth;MaxMQ0Frac END=96500;SVTYPE=DEL;SVLEN=-754;IMPRECISE;CIPOS=-87,88;CIEND=-78,78 GT:FT:GQ:PL:PR 0/1:PASS:92:142,0,250:18,9 -GL000199.1 140869 MantaDEL:3:11274:11277:0:0:0 T 692 MaxDepth END=142832;SVTYPE=DEL;SVLEN=-1963;IMPRECISE;CIPOS=-86,86;CIEND=-98,98 GT:FT:GQ:PL:PR 0/1:PASS:390:742,0,387:32,40 -GL000199.1 158220 MantaDUP:TANDEM:3:11210:11210:9:0:0 T 351 PASS END=159624;SVTYPE=DUP;SVLEN=1404;IMPRECISE;CIPOS=-131,131;CIEND=-126,127 GT:FT:GQ:PL:PR 0/1:PASS:351:401,0,848:60,37 -GL000217.1 42384 MantaBND:5625:0:1:0:0:0:1 T [21:9551401[T 174 PASS SVTYPE=BND;MATEID=MantaBND:5625:0:1:0:0:0:0;IMPRECISE;CIPOS=-313,313;BND_DEPTH=90;MATE_BND_DEPTH=75 GT:FT:GQ:PL:PR 0/1:PASS:47:224,0,44:5,12 -GL000217.1 109405 MantaDEL:3:2064:2064:0:1:0 AAAAGCCGTGGCAGGGAGGGGCAAAAAGCCGTGGTGGGCAAAAAGCCGCGGCAGCTGGGGGCGAAAAGCGGCGGCGGGTAAAAAGCCGCGGCGGCGGGGGGGGCAAAAAGCCGCGGCGGGCAAGAAGCCGCGGCGGCAAAAAGCCACGGCGCGGGGACGAAA AG 62 PASS END=109566;SVTYPE=DEL;SVLEN=-161;CIGAR=1M1I161D GT:FT:GQ:PL:PR:SR 0/1:PASS:62:112,0,118:17,20:31,7 -GL000217.1 151317 MantaBND:3:17933:17940:0:0:0:0 A [21:9442633[A 17 MinQUAL SVTYPE=BND;MATEID=MantaBND:3:17933:17940:0:0:0:1;IMPRECISE;CIPOS=-117,118;BND_DEPTH=127;MATE_BND_DEPTH=128 GT:FT:GQ:PL:PR 0/1:PASS:17:67,0,611:41,11 -GL000217.1 155442 MantaDEL:3:18182:18182:0:2:5 GCCGCGGCGGCGGAGCCAAAAAGCCTCGGCGGCAAAAACCCGCGGTGGCGGGGGCAAAAAGACGCGGCGGCCAAAAGCCGCGGCGGCGGGGAGTAAAAAGCCGCGTCGGCAAAAGCCGCGGCGAGGGGGGTAAAAAGCCGCGTTGGCAAAAA G 69 PASS END=155593;SVTYPE=DEL;SVLEN=-151;CIGAR=1M151D;CIPOS=0,12;HOMLEN=12;HOMSEQ=CCGCGGCGGCGG GT:FT:GQ:PL:PR:SR 0/1:PASS:55:119,0,53:27,3:11,4 -GL000216.1 116789 MantaINS:1181:0:0:0:8:0 A 133 PASS END=116808;SVTYPE=INS;LEFT_SVINSSEQ=TATATTTATATATACATTTATATATACTTTACTATGATAGACAT;RIGHT_SVINSSEQ=ATATATACACATATATACACACACACACATATACATATACATATATATACAT GT:FT:GQ:PL:PR:SR 0/1:PASS:133:183,0,439:2,0:25,7 -GL000216.1 148942 MantaDEL:1248:0:0:0:1:0 ATATCACCTGGGAGATCAGTGCAAAGGTATATCACAAGGCCCTATGTAGGCAAAGCCTAGACAGTAGTTACATCAGTTGGGTGATCAGTGGCGAGATCTCTCACAATTCCCCTGTAGGCAGAGCTTATATAACAGTTAC AA 305 MaxDepth;MaxMQ0Frac END=149080;SVTYPE=DEL;SVLEN=-138;CIGAR=1M1I138D GT:FT:GQ:PL:PR:SR 0/1:PASS:305:355,0,689:54,6:209,13 -GL000205.1 64758 MantaDEL:412:0:0:0:1:0 CTTTCTTTCTTTCTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTCTTTCTTTCTTTCTTCTTTCTTTCTTTCTTTCT C 999 PASS END=64844;SVTYPE=DEL;SVLEN=-86;CIGAR=1M86D GT:FT:GQ:PL:PR:SR 0/1:PASS:152:999,0,149:30,0:66,24 -GL000205.1 89982 MantaDEL:456:0:0:0:1:0 TGTGCTGGACACCTTGGTGGTTCCACTTGTCCCATTGTGATGCCCATGGCCTCCTGGACTTTAGTACATGTTCTGACGTTGCAAGATTCCGTCGGCACCATGGGAGCCGCTTCCTCTACTGTCATTGAAACACCCCTGGGATGTATATTTAAAAATTGAAACAGCTTTTGGCTAGATGAACCAAAAAAAAAAAAAAGAAAAGAAAAAACTTATCTTCTTTTGTAATACTATTTAGCTTGCATACAGATTAGCTCACAAAACATGGCTGGGGAATGAGACTGTGAACTTTAACACCCTCCTACAGCTAGATCTTTTCTGTAGAAATCAGGGAAAATGGTCTGAAATATCCTATGTGCAAACCTTTATGGCCTGACAACAAAACCCAGCTCTATGCAGCACCTGTGGGCTAAAGCCTAGTAAGCCACAAAGCCCCTCAGAAGCATTAGAAGATCATCTCTTATTAAGGGGAAGGGACCCCAGACCCAACAGCCCAA T 416 PASS END=90475;SVTYPE=DEL;SVLEN=-493;CIGAR=1M493D GT:FT:GQ:PL:PR:SR 0/1:PASS:416:466,0,999:221,17:244,23 -GL000205.1 94604 MantaBND:448:0:1:1:0:0:0 C [GL000195.1:24097[C 24 PASS SVTYPE=BND;MATEID=MantaBND:448:0:1:1:0:0:1;IMPRECISE;CIPOS=-212,213;BND_DEPTH=305;MATE_BND_DEPTH=395 GT:FT:GQ:PL:PR 0/1:PASS:24:74,0,999:253,44 -GL000205.1 103024 MantaBND:429:0:1:1:0:0:1 A [GL000195.1:16560[A 126 PASS SVTYPE=BND;MATEID=MantaBND:429:0:1:1:0:0:0;IMPRECISE;CIPOS=-141,142;BND_DEPTH=205;MATE_BND_DEPTH=289 GT:FT:GQ:PL:PR 0/1:PASS:126:176,0,999:296,56 -GL000205.1 110471 MantaINS:436:0:0:0:0:0 C CGAAAGTGCCTTCCCATCAGCCCCAGCGCATGGCCCCGGGACCCTGACATCTCTGGTTTGAACCCAGGGTGCGTCTCGGGCCCGATAGGGGTACCCCAAAGCGAGCAGAAGGCCCCTGAGGGGGAAGACTAGGTTTGAGGGAGGGGAGGGGAGGCAATTGTGGCAGGAAAAAAAACAAAAAACAAAAAACACAGCGCCGTCAAGAAGCGGGCCTGTGTCCATCACATAA 999 PASS END=110471;SVTYPE=INS;SVLEN=228;CIGAR=1M228I;CIPOS=0,23;HOMLEN=23;HOMSEQ=GAAAGTGCCTTCCCATCAGCCCC GT:FT:GQ:PL:PR:SR 0/1:PASS:679:999,0,676:68,3:14,179 -GL000205.1 142318 MantaDEL:453:0:0:0:0:0 CTCCCTCACATAGGATTCCAAAAGACTGCTACGAGGTTCTGAATTATTCTCCATCACATGGGATTCCAGAACACCCCTGCTGTCTTCTGAATGTTTCTCCCTCACATAGTGTTCCATAACACTACTGCTGGGTTCTGAGAGTTTT C 317 PASS END=142462;SVTYPE=DEL;SVLEN=-144;CIGAR=1M144D;CIPOS=0,19;HOMLEN=19;HOMSEQ=TCCCTCACATAGGATTCCA GT:FT:GQ:PL:PR:SR 0/1:PASS:317:367,0,999:74,32:88,24 -GL000205.1 144054 MantaDUP:TANDEM:3:91:91:2:2:0 G 146 PASS END=144150;SVTYPE=DUP;SVLEN=96;CIPOS=0,11;CIEND=0,11;HOMLEN=11;HOMSEQ=GGATTCCAGAA GT:FT:GQ:PL:PR:SR 0/1:PASS:146:196,0,999:93,0:180,16 -GL000205.1 148170 MantaBND:3:113:150:0:1:0:0 C [1:142653895[GCAC 324 MaxDepth;NoPairSupport SVTYPE=BND;MATEID=MantaBND:3:113:150:0:1:0:1;SVINSLEN=3;SVINSSEQ=GCA;BND_DEPTH=188;MATE_BND_DEPTH=283 GT:FT:GQ:PL:PR:SR 0/1:PASS:324:374,0,999:176,0:208,27 -GL000205.1 149555 MantaDEL:464:0:1:0:0:0 T 449 PASS END=157324;SVTYPE=DEL;SVLEN=-7769;CIPOS=0,6;CIEND=0,6;HOMLEN=6;HOMSEQ=ATAGGA GT:FT:GQ:PL:PR:SR 0/1:PASS:449:499,0,999:135,18:123,18 -GL000205.1 153589 MantaDEL:3:55:152:0:0:0 A 283 PASS END=154961;SVTYPE=DEL;SVLEN=-1372;IMPRECISE;CIPOS=-203,204;CIEND=-219,220 GT:FT:GQ:PL:PR 0/1:PASS:283:333,0,999:116,38 -GL000219.1 55210 MantaDEL:391:0:0:0:2:0 AACAGGCTCTCATAGTCACCATCATCACAGGAATCTTGCGCCTGTTACAGGGAAGCACTGTCCTACATCGTATGTG A 999 PASS END=55285;SVTYPE=DEL;SVLEN=-75;CIGAR=1M75D;CIPOS=0,26;HOMLEN=26;HOMSEQ=ACAGGCTCTCATAGTCACCATCATCA GT:FT:GQ:PL:PR:SR 1/1:PASS:65:999,68,0:29,0:24,39 -GL000219.1 111132 MantaDEL:226:0:3:0:0:0 T 999 PASS END=114246;SVTYPE=DEL;SVLEN=-3114;CIPOS=0,10;CIEND=0,10;HOMLEN=10;HOMSEQ=TTACCTTTTC GT:FT:GQ:PL:PR:SR 1/1:PASS:176:999,179,0:0,40:0,22 -GL000219.1 120545 MantaDEL:366:0:0:0:1:0 CCCGGCAGGGCCAGCGCAGGGGTTCTGCAGGGCCGTCCTGTGGCTCTGATATCTATGAATGGGCGGCCAGCAAAGGCACAGGCACCAGCAGAAAGGAGCAGAGACGTCCAGAACATGCTGTCCCCCAATGCCAGCCCCAAGCGAGGCCAGCTTAGGGTGCGGTGGGCAGAGCCATCCTGCAGCTCTGAGATCCACGAGGTGGAGGTGGAAGTAGTGCCAGGCTCCACTGCATTGGAGCTCGCACAATTTGAGCCCAATTTGATTCCGGAGCCATGACCTGT CTGC 749 MaxDepth END=120825;SVTYPE=DEL;SVLEN=-280;CIGAR=1M3I280D GT:FT:GQ:PL:PR:SR 0/1:PASS:408:799,0,404:174,3:394,19 -GL000219.1 121166 MantaBND:310:1:2:0:0:0:0 T T]20:29595124] 675 MaxDepth SVTYPE=BND;MATEID=MantaBND:310:1:2:0:0:0:1;CIPOS=0,1;HOMLEN=1;HOMSEQ=G;BND_DEPTH=349;MATE_BND_DEPTH=242 GT:FT:GQ:PL:PR:SR 0/1:PASS:675:725,0,999:306,4:249,37 -GL000219.1 122613 MantaBND:317:0:1:0:0:0:0 A A]20:29594562] 246 MaxDepth SVTYPE=BND;MATEID=MantaBND:317:0:1:0:0:0:1;IMPRECISE;CIPOS=-200,200;BND_DEPTH=352;MATE_BND_DEPTH=129 GT:FT:GQ:PL:PR 0/1:PASS:246:296,0,999:86,25 -GL000219.1 123128 MantaBND:326:0:1:0:0:0:1 T [4:190835845[T 83 PASS SVTYPE=BND;MATEID=MantaBND:326:0:1:0:0:0:0;IMPRECISE;CIPOS=-132,133;BND_DEPTH=112;MATE_BND_DEPTH=49 GT:FT:GQ:PL:PR 0/1:PASS:83:133,0,999:132,31 -GL000219.1 159372 MantaBND:322:0:1:0:0:0:0 T T]GL000198.1:69848] 999 MaxDepth SVTYPE=BND;MATEID=MantaBND:322:0:1:0:0:0:1;BND_DEPTH=166;MATE_BND_DEPTH=341 GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:61,14:66,48 -GL000224.1 42781 MantaDEL:3:43:47:0:0:0 C 607 PASS END=157865;SVTYPE=DEL;SVLEN=-115084;IMPRECISE;CIPOS=-186,187;CIEND=-234,235 GT:FT:GQ:PL:PR 0/1:PASS:607:657,0,999:82,46 -GL000224.1 73529 MantaDEL:39:0:0:0:0:0 ATTCCCCACCGCAGAAAAGCCTTCTACTACTGACTGGACTGGAGAATACATGCCTCAGTTCCGTTGTTCCTTGGCTGTGTAACTCTGAGGTCCAT AA 917 PASS END=73623;SVTYPE=DEL;SVLEN=-94;CIGAR=1M1I94D GT:FT:GQ:PL:PR:SR 0/1:PASS:772:967,0,769:36,10:78,51 -GL000195.1 4607 MantaDEL:110119:0:1:0:0:0 C 999 PASS END=6132;SVTYPE=DEL;SVLEN=-1525;CIPOS=0,2;CIEND=0,2;HOMLEN=2;HOMSEQ=AT GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:121,101:114,78 -GL000195.1 16560 MantaBND:429:0:1:1:0:0:0 A [GL000205.1:103024[A 126 PASS SVTYPE=BND;MATEID=MantaBND:429:0:1:1:0:0:1;IMPRECISE;CIPOS=-185,185;BND_DEPTH=289;MATE_BND_DEPTH=205 GT:FT:GQ:PL:PR 0/1:PASS:126:176,0,999:296,56 -GL000195.1 24097 MantaBND:448:0:1:1:0:0:1 T [GL000205.1:94604[T 24 PASS SVTYPE=BND;MATEID=MantaBND:448:0:1:1:0:0:0;IMPRECISE;CIPOS=-203,204;BND_DEPTH=395;MATE_BND_DEPTH=305 GT:FT:GQ:PL:PR 0/1:PASS:24:74,0,999:253,44 -GL000195.1 55076 MantaDEL:7240:11:11:1:0:0 TAAGATAAGACAGTGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACAAGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGTTCGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAAGCGGAGCTTGCCGTGAGCCGAGATTGCGCCACTGCAGTCCGCAGTCCGGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAA T 125 PASS END=55399;SVTYPE=DEL;SVLEN=-323;CIGAR=1M323D;CIPOS=0,11;HOMLEN=11;HOMSEQ=AAGATAAGACA GT:FT:GQ:PL:PR:SR 0/1:PASS:125:175,0,999:117,9:110,15 -GL000195.1 56177 MantaINS:110198:0:0:0:0:0 T TTCAAATCAAATTTGTTTTTTTGTTTTTTTGTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGACTGCGGACTGCAGTGGCGCAATCTCTGCTCACTGCAAGCTCCGCTTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCGCCCGCCACCGCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCTTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCATGATCCACCCGCCTCGGCCTCCCAAAGTGCTCGGATTACAGGCGTGAGCCACCGCGCCTGGCC 496 PASS END=56177;SVTYPE=INS;SVLEN=330;CIGAR=1M330I;CIPOS=0,18;HOMLEN=18;HOMSEQ=TCAAATCAAATTTGTTTT GT:FT:GQ:PL:PR:SR 0/1:PASS:496:546,0,999:141,23:108,17 -GL000195.1 71292 MantaDEL:110169:0:0:0:0:1 ACGTTTCTTCTGCATAAGCAAAAGGTACAAAAATTAAAATACTTAATGTGATGGTTA ATTAAGCT 999 PASS END=71348;SVTYPE=DEL;SVLEN=-56;CIGAR=1M7I56D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:34,0:115,73 -GL000222.1 60338 MantaDEL:117338:0:0:0:0:0 CCAATAGCACAGTAGTTTATGCAAACACGTAGAGGTAGTGCCTTGGTGGCCTTGGATTAACATCAAGAAAAATTCTCTTGATTTATCAAGTAGAGACTCTTGTTCTCTTCCCTTACTTTCTTTTAAATCA CG 587 MaxDepth END=60467;SVTYPE=DEL;SVLEN=-129;CIGAR=1M1I129D GT:FT:GQ:PL:PR:SR 0/1:PASS:587:637,0,808:62,18:119,18 -GL000222.1 61283 MantaDEL:117344:0:0:0:4:0 GGTACAGGGGTTCTTTAGTCAGCAGGTGATGAATTCTGCCAGGTCTTTACTGACATGGCAGCACTGAGTTTAATGTAAAGTCCTCCAGTCCT GTTCCTCTAATCAC 60 MaxDepth END=61374;SVTYPE=DEL;SVLEN=-91;CIGAR=1M13I91D GT:FT:GQ:PL:PR:SR 0/1:PASS:60:110,0,282:55,0:50,8 -GL000193.1 68524 MantaBND:857:0:1:0:0:0:1 G G]4:190479009] 222 PASS SVTYPE=BND;MATEID=MantaBND:857:0:1:0:0:0:0;IMPRECISE;CIPOS=-150,151;BND_DEPTH=118;MATE_BND_DEPTH=49 GT:FT:GQ:PL:PR 0/1:PASS:222:272,0,999:74,28 -GL000194.1 47237 MantaDEL:1077:0:0:0:0:0 TTGGACTTCATCAAAATTAAACATTTTTGCACTTCAAAGGACACCGTCAAGAAAGTGAAAAGACAACTCACAAGA T 289 MaxMQ0Frac END=47311;SVTYPE=DEL;SVLEN=-74;CIGAR=1M74D;CIPOS=0,4;HOMLEN=4;HOMSEQ=TGGA GT:FT:GQ:PL:PR:SR 0/1:PASS:180:339,0,177:0,0:26,8 -GL000194.1 82300 MantaDEL:1105:0:0:0:0:0 ACAGAACACTGCTGCTGGAGTCTGAATGTTTGTCAGTCACATAGAATTCCAGAACACTGCTACAAGGGTGTCAATATTTCTCCCTCACCTAGTATTC A 416 PASS END=82396;SVTYPE=DEL;SVLEN=-96;CIGAR=1M96D;CIPOS=0,10;HOMLEN=10;HOMSEQ=CAGAACACTG GT:FT:GQ:PL:PR:SR 0/1:PASS:416:466,0,717:13,9:64,15 -GL000225.1 4921 MantaDEL:3:19821:19821:3:0:0 TGATCAGTGCAGAGATATGTCACAATGTCCCTGTAGGCAGAGCATAGAGAAGTGTTGTATCACCTGAGC T 309 PASS END=4989;SVTYPE=DEL;SVLEN=-68;CIGAR=1M68D;CIPOS=0,26;HOMLEN=26;HOMSEQ=GATCAGTGCAGAGATATGTCACAATG GT:FT:GQ:PL:PR:SR 0/1:PASS:309:359,0,415:22,0:40,11 -GL000225.1 24224 MantaDEL:533:0:0:0:0:0 CCTGATCACCCAGGAGATGTAAATCTTGGCTAGGCTCGGCCTACAGGGGCTTTGTGACATATTTCTCCA C 999 PASS END=24292;SVTYPE=DEL;SVLEN=-68;CIGAR=1M68D;CIPOS=0,10;HOMLEN=10;HOMSEQ=CTGATCACCC GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:36,0:132,31 -GL000225.1 71951 MantaDEL:3:19861:19861:0:1:0 ACCACGCATCCTAAAACTCCTCCTGCAGAGGCCGGTATTCTTCCTCCCTGATTGGTGATTCCAGCGAGGTGGCCTCTTCCAAGGCCTCCAG A 316 PASS END=72041;SVTYPE=DEL;SVLEN=-90;CIGAR=1M90D;CIPOS=0,1;HOMLEN=1;HOMSEQ=C GT:FT:GQ:PL:PR:SR 0/1:PASS:316:366,0,906:48,0:136,13 -GL000225.1 76478 MantaDEL:3:19878:19878:0:3:0 GATCCCCCAGGTGATGCAACTATTCTCTATGGTCTGCCTACAGGGGAAATTGTGACATAACTCTGCACTT G 100 PASS END=76547;SVTYPE=DEL;SVLEN=-69;CIGAR=1M69D;CIPOS=0,3;HOMLEN=3;HOMSEQ=ATC GT:FT:GQ:PL:PR:SR 0/1:PASS:100:150,0,999:43,0:105,9 -GL000225.1 143516 MantaDEL:1227:0:0:0:1:0 GAGCATTTTGACATATCTCTGCACTGATCACTGAGATGATGTAACTCTTCTCTGGGCTTTGCCTACAGGA G 208 PASS END=143585;SVTYPE=DEL;SVLEN=-69;CIGAR=1M69D GT:FT:GQ:PL:PR:SR 0/1:PASS:208:258,0,999:27,0:118,12 -NC_007605 139719 MantaDEL:110055:0:1:0:0:0 G 999 PASS END=151550;SVTYPE=DEL;SVLEN=-11831;CIPOS=0,4;CIEND=0,4;HOMLEN=4;HOMSEQ=GGCC GT:FT:GQ:PL:PR:SR 1/1:PASS:999:999,999,0:0,312:0,185 \ No newline at end of file diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..63c9e172ea63a2d1cbe4675c8e56a2636fb49ad8 GIT binary patch literal 8441 zcmVF!h18iw#bZ>TLa$k01W?yb^ZeMeBVQyq$a%5j)XmW3DUukYG zc4KA$%v)`5+t!l)%=`-B7TBhMV)ML`lfP9#W+0r1`LMEYsw0&di*dXI@Bu_ucJmUfwQO57W)z`|RYC-V-mrqmxI|O?fCo zDZ@yHaBjVPT3wfidATT8aOvuOHeKAzZs2e|5_@(JX;$TJxhfYxh8!mQ`}_CHhjRbv z*$J3)jF`k!;yUqQf)QL2qmExy1fouqrND>vP4uPr&L=uCBT7?|~ z-4F<2w2W+rz|;hBm{`oFB_34*j@(P7L_;MbaQ0p*5&Bd%?w3n>D}qN#-A`paDh;0B zZwOB=WyO|9FqMvcD!mnlj>=!rLL~K3iNxm>iKy!!PA=tB>104IZ--P2Fx5}x+bcF< zuk<2m@k+!1uh^qXqJe$X+NVlnzZ0q!uS5;Fs`Okn6ybn>X`jk$0imO6@v^3WA4AU) zT7YT#bqsx~L=DJg+f)Nq6@EC10lBp26Iv-fAeXPBGIBsJUwUPe0ekr{VdAX}=BRwR zOfn!>=X)sh0;GX~0Q3z48D+qGrKc%) zmDnoQ9XC$}DF5`9LC&)eVHnx4sU#u#82?VsPu>#E>Dk5QadvujJU)MV_*kwE!*TKX zu{=DPPw&P@<@$Oxd)&;Hi^G4MK0L0<>)E;-tv9Qu>&??@Iv;(Su4Yrb{*M;dtM|n} zFKU*8Szc{cvqgs~UKlNJw^%MVU50Ot&ntd=%0 z-^+3_x;#D^L5ns@-xQg;ddDyK(i z?+4`eaaB%#D;FKZ9=|_6FP`Ce%&7c?qC#)IyaqRhJ=-h?_`ZI7_Dm0a!#)SxKTaW7 z9;dU_2#vj&U4xmS`(MtNcb`Yoja!}8dtAasthbiDI)3*Iqg<8$exl9%`)gjkKRv(t zVnt_*b-C(80bus!DhzOYb9_>K_yU&yy;%N^2ynX@t-%&8&g7lbg?9q3J}rK8x1e6_ z#V=3u*I%*vYIb-3Etdb!^P>+(#}~!x=b+PNIlTcGdW5}!4WBR31vabcVvUEiTEB~9 z5PtX@E3L|hCB$WWsU4yEUq7E`#p&Dg5AS|X-#llzQ%2gW_oIJ3Ev|_@!0Y35wL$xV zJt_D{FK6Z6XgYd$nr~*WrZ@kZUV~?St^oRnF{ASBtegXiua=)+chaswB;H=jJ`=Bw$$;~We;!?Z9SeSeW&U44(K0ibF_8v(8a zIl@t_NB7fDh-m9Ih|SvqySFATThhOKjvH0tkKR2&y4rm1a=ECTG2g-E94Ex{*`MXj zcyw{UDsNt`A<)pYv){`3?0&ff)1$n1Zn-t&83>l!*`lfq%F+AmWVC*Ss_J&uzaJNu z&q3m=$LV4LmL}i_Nmdw_>!(ND?An!A<;a-v?EY|6p;`ovcGhfbbKDUK90X|2^Z$|$y zI{Nvdiud)HR%~_w!e9wOGM_#^qRA?<(aYawfMe^~!)!iX!OuU-AD}40ss&2wHHr#} zlW1;ckGT4%`~$XbFF|F)`Uf`E zE!(PHUeUDt6ya9-D<>|vzY;sekEnotZJc}8C#Eo?m&|+w5ip;DonQ$k5v{$&`}OkSvb=3G$C~ob&v(lw>~E@$ z>l#&g7I)~ts-!lMz*=Mw)!j&%mzypX6lYy#ekEBm`}ty0q~}M+XPsUYz^Z&09X)~E z9vWXi+{^*95BDEtiy1({&2<0cY_WfhJ=EP9xaV~5@$=}_tJ(}F_Ayb>o1Vy{t52?fLHdQh26XR^Tl`B z>&v%i0W_Pz=}`b3bO3e?FbD#6kpL@{fITy~O3yCd9E%qs5TO++Ap1Z~IWZ-OltGm;gX8m~!#E0MtPdO}K=>Wd81V2_tiuDEHyw&II6iU! z5RPSZz=N7YK7wBTaL1}0B$!-X1_Howme%Lp&kR3;5Ktxqs~1Hb zGi)1DxybNJ2C8RSy;HbB1(G0XMpi(QG-JCES7e$Y%gW?H07rPP0EWm5{}ud^!MF44 z3|A;n4sbeXEjaVARg>7nVUJaXjcT*Pabs5zSBB*ywCuL_1CBH6n7)R)7VBi=( zELz#>M8}4$45bZNS)0hSavqQcNHbWbK+VBKpc}5uUWP_Qh(ZhFCKN>w$5F?Zc%e#b zSjuT_M6I(Cr?twa(%94rqZqq%j3LqMa2y+SBhpi8r=pER-;r2FPOl>HrN=Exacfy? z+WUfc+EX~|D2%tHg}cF7D(kaU=f>x*mD4Rny;gNgXIF}?8?%m(JJu?`{Fb3rI;Kds zorZ2I`fb?yM=(WZ%A6o@l#{<#H{~vd@n=peQp!jXh|`p_2kr*lgR2#lOc*|RCFSc5 zDJk63tU~aNZbwj{b!pku30Lp|N=||jP~Z}nu=0Yn2WGr6Xk=bA#VsSF6X(pA0ucz8 zB`I7Up?f=0WV@Vi&>^}O_jY&d)@;YEJKZABDp=@Ws~sW5Utmd}bS)o*8vv%-7;8;v zwd}#z#5So5j*oVwCtyja8IJKNh947XIL6AnXkMELA{*Jb0T-DVOzQN7ISO^cj7$3e z^*`U=vn_w83(ngq_Yl>9x99Bb*?_l`NF8s_YHu%wy}fbw+;?}zCzdK_@qKsCoV(|Z zyBCA*p4aZ4S7JaGm9WFDoDxN067}q{l1aVCaO~yLI;)5#t+aP&mp=$ACrYC~Kn|a% zc&y|^L}MHJ{t$glSE}V;Qwl;%q?FFng*0Q;R>~S>8l{xM?5RDa@?Tgh=(C0Rg@Uo(svU~88HGDb z?1G%%c}c-VT|vH%PlK!M0D~5 zmMC(NfxyRawj|a4^(ly|yb6t81?a8S5VdxOQd{~&p!A7^fbxZp@x5kWcoiA<5l&(> zH%4yC?Qy}yd+ss{I!BeB=r_tNlwmmFa}Idw6H-n&S&`St4Lu8;3@eI)a0&@%y_7R@ zmNH`US)xiahGDv&XsJ9QjATIzM)cH4>O{+2GQ)nP7L@oA&=%W*;$5J znw_;jw z_7=RsG|S4_+L_x-ZoRl|<*p-jnvhYr9V7>+L?gn2lNfa_1Zwo3qa^2@rwz!+G;BB0 zw)Pw{c*=vcYAfky;o1Y2W%8nOE80EEmyB#KY4IXXDCZnx4(hzgsVH?(KU;9uQ|BYL znFC8VBBh*^Rztypho))eA?)eg?Nt6%HRH&m+mps7iu&fvi3hE4yla~JB{daPeq#fl zN!_2&3+sUqq4fGy5m})Vf2>+XYU(V(`&8w1Mjh9k)2y3I4f0TS0Uoe8XrK_}vmBfm zGh{z<{iwquj<~{=aG7(CyefR$wAYWbu#)`%W`Z8Ma$VwXqmZkXF1X>s_Bk5A*O z%Ev!u2=EU8e20V-nOINbEO(Vn#seva-;5t;(6oXoBA$`(s3hZu9W)rh0SQ^Bf^!0a zoP(Wljp|{}s5E=Ya+jRkz=O*zw*psRRM6p#RmsK)$_+P&Ke;;RV~iE3X+&r|)7vn% zeU(-cHxB`lxG}!2jIW!AEBVeA5!M3$KyQm8Ef(~sb`pou zR@Qe7q7tH!U1@21Wsii}9!@}S8pp0#d==|)Jp6DRR)NA%jB#`iEvg;M(3jTAQ2Ecv z!z8hwA{9p(i=%qpOTM zeO$~VabZS%9UXCBS5%_9k)AIqCwkW_=Pu!;98h`H zwPktL*QMQFIctl`HE3g1_aMr%D7S!Zi)NKY3$Oi5J7PASa_Y68RZCx6l%HADVnV_d z#`bkK2d($P$c3#my039GVKX~~V-Ndm4Su!?z+rdW8J5OQWM~~KLSH#C&?`yndwOCb z-5Pxe0ceSzs_F!aYIGw9n>mP%uy2qP8T}Vw8pY!%9t_h2I$({{LW%Rcc%1m63k|@Q zbYzb-eMTMGZPI<@tYxhF=Jq0iR&HBnI|dMSGAAI&u*@n6ClLv);t0V|x1dFs7_x8A zgbxiLy3eF<$ft~GJ>0cY_f7f29+L|t#_(M!AOP^dNW&@^G0`ESBlL(93z|ePn!n=< zv{bS7C(qI}jpIas+`mXMNdWMH$uC79BK?fXbBjsO=vfIhhHq~Kn}pp7Y-9#?(h;N_ zjgtaBF`&J#5a7EGO)EQvR`UnR6mWr03EjQk;A-~zbkRDI<@FOj%=LUacmFH2adiHR zG@T3l7JyG#5u}-W^iQ=34wj(Qw9<2sFfskP)2q3}Dg3r`!eS-5B#se8jL#rs2)T3I zatC3+wY(=0;L1^==u>;;K0AemjIPu#X@Ga|3#mKj_C37%-jxH%KtO*yFMD2=8xC3# zO>6{OTEIRnyg>;oXh7^1(8MB^?lYXo3AmlVnQ5NY8&&ulmu2_oXA@y&eI_UgJ^M;-YB}J^6MykAa#=U{!7uzI1~a zH?D_s7A-4m6es>mA#sHOG5O4oaQ3G_C~p!A_r%TU5T|Xezoj~K^A0!p$oylTB&5`( zW06!GwEq(7u)Zntj;gO}NK({S>1byLM%RsTqN`OhfiRj#(*zG$K*x#iTSDr%@^fmE zQ#b9;-D^X6H6AS7wX~YaaMPSQx8v8YsDRDcHWYfy@ z_U+e?KLr)aRMG~Q+|te$x@9fAPE~s&RVy+h7D?I=(3CdJH07Lz?E!W4Fw;u}?Ssq2 z^Ax(D>vm%N+5JJ)DmyI-|G>radN~9DAB8xEoQ0Xo@_*iz&+{ zRWyo;NjEXqi9e)PV@z!bB}Zh_S-I;XvHYkt4pRa2!Ay_H5xgW<^B*t?B~0OQ3Z+g* zkVs-CQ-vNR_Np*XO}7svDW^lA0E1U{tWw!+tG{2)AJV4 z-#9O=tTN-oOdiLBpkJWk@p_)MMENOlz|)R7yX-CDFHPod73o}u7< zm+DND>WuAV*mBukL3FJ2WQbu8iP%^;X?eO|P*&+Nsge7j-LH$oN9mGn)f-&cl{SCYy$42m^(B|rF2W7@S)1Pm8VAIzXtw6`|(t3YZu0;IqlW6*3O~N zrPkK;woa>Y^}`)_1q2R`bgoA_TcWX5XYFM=Nk=u6TN-}(QPlpWkv9q6rtE)VYT7J? zIYDkSOZ8rUP1n}lKV-Xq2tMH57-f=uy^9c}ZHv+r(*X0qZH)80GZZhw`KZPDMD#c} zRXfp|6-J9O%<%F!`BQ7(9Lx*%=#}Zl#PXN)9AhRX(FqzIP#`d$2@#o zsbTmohm~rgZ8VEqsVtP|u97?kDlcT3K4!*bcWbNL1gfepTStsfMA|$)VsRL7u9cY~ zK7@^;V%WbJDoVs5Bu0RZBQl#U8Te4(DO{3>dUWqwfDcY9ToZkZBhmlBmwp`Go3B53 ziqa6DVUihNAfV)NO*DMj0gfAzFYKKtopTNb#+7*#{-(zd)#Hb1@k4d>>kt#u+xX$+ z&;%f{P*;MZ77Q~@0CuArnXWL_6YPLxb&FxZS;M~kDcwA7Hz_Q33&$@#QKeK{0B;1= z&qN;(EI_qJO-GW77%yD>^XwTeY&PIRRG>y}=VG@%o8V+9;&DG~YX!ZEelM=(BJO2U zeoyi}bLGn};QE&8ANh!rQT>KLw?%jJ1;c74v)ZaAbs5)w!|&cfxJrAZ5aN`By9m1u zjvD}kdG8rGqhW~q$QJLWA%ttYejD7lK6`E4?=8e$tiJ~N=|cKNay~~K?(!qdKa9UO z6y*14swQn!_;PT<@<1;0G|kj%iz$BKg{jk4rU}Kc`3+@8_`_+A7h2UC3Qmhs;~c$t zl8B!XIgEsGxHx5@l=t(@tgem-8W07Ki8g0%Gh_*?xIW_>@{R&dE%73pZuAckdx5mwx}bYkRud*b0pv^m`TP^j}y6 zu=ql@#nqgLkN4Z0iD^=)okihbR;YuD6U0bi6$0>?&}k*bz`oEawk&P2*8W_-!iPa# zst7lkFBirEN7$kxs<;_!D3q2W0r(L2fMEJVm>}s~wL3V;{xX1M6^?!ODzB#-DgO5XLod+nmw30-xQcrxLg?9>@%J1N&&zui2ZcPs zJaol{)$>;j}Zp8eTm zFFg1J3->AwQrFeRraFpqXko4`%5w{|^hMPc=0g-M@>Y&5%#80DQnA;czp;T6@mpzK z-|XfWOYrjx-S)KgVO?dy$Sbsl??%}??7TpF4vFKYJ6tVkGW9!wl3xbYu z6sj;LY->&d(OZo#7iv_aiFYzAEVCuwH~t0cVdq}{5C{dsR*0eKF)cQw=@Q){r9 zDM4j5?uiIs(9DS_^aQN8gFQ-8KV9OOCh4x!gUaWYEP$tYe6M3;HK5!~lu=u^GL8DK zIK^-Ot~t$_%92x3`>)t~Obp6v%G#S#(vF**N+oeWvWu)4xe9NXT6c*k#cvK4WQ4Ij z#PKbTIsW6?-^`n=u#K`@-6Aix_N|Tmen>k7eT_h1o%B&f@R zAwEav&gbwF;j_f&C33aBkQ{=iupZ(NCKlpkU*}QX?tF$W(WOt(vVXNV7+G;|lOUJ-t=L+I$!;HWA>ZB^nVR#k+P+pU?uKStGoRf>a&oZ4N}RR5II zkrRfKSL|8tF)xRIMjq`!p7kO8nGzax#0O@PkGz5adWt!uB{Nlr00E4aSIax3do<3Y zHO5FI$84`SIM%V+ayFj_K40b9tQyVpIjz7@=%Q6f?|kUzQN3F+AVYZKrL^C%YkPO9 zd!r1aeq#oww6~LJRd_uZ*>HpDA>43nVPbI5eDu=2*tf@V_g%mz=2a>8VL6sNq7CaW zQYESq69Hlv17v^S6G6+XczB9D*P<{`FPGRbxc3KvS8eet(jUiZQ_fH2G>fD|7E|f3 z@HtcCX^Bt=t>Q=SP$|DQmAozpd=wJKxF8A+OQHn{Lw_nQNN_RT79=KlC9ZrUa9Eoh z7&){Da8wsY-xG(Z!7S)92F6f}E^IY4VaDLyD*K0?{KH_gZpyChCI0K|;loe~lkMP0 zoKwD9l6msl*{PFUo7PjKxov>6h3xP)8QzY}V#_Jvn|*O9bzOZ$(9&mA8s$mrmx9SW z^VfDEKsv^!S_AX*u@bsu+y8Z8(>6z)r_7-I;MpY!^(lXN?fudfrjyk}CacuTqWk1r bo>P54r*u~FVNB=-L$2e0w2P}0{cZpNJyTq@ literal 0 HcmV?d00001 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf deleted file mode 100644 index 570bd53..0000000 --- a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf +++ /dev/null @@ -1,135 +0,0 @@ -##fileformat=VCFv4.1 -##fileDate=20220720 -##source=GenerateSVCandidates 1.6.0 -##reference=file:///home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT=0.999"> -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##ALT= -##ALT= -##ALT= -##ALT= -##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 -1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 0/1:PASS:26:76,0,186:13,5 -1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 1/1:MinGQ:3:372,4,0:3,14:1,3 -1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 0/1:PASS:602:652,0,794:27,0:110,24 -1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 1/1:PASS:71:999,74,0:0,28:0,26 -1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 1/1:PASS:82:940,85,0:1,0:0,34 -1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 0/1:PASS:999:999,0,999:80,9:106,35 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..c63e287e86de7f73db7c073636614665cc670c0c GIT binary patch literal 3202 zcmV-|41Mz-iwFoD^5S9u18iw#bZ>TLa$k01W?yu7Z(nm^ZE$R5b6;t0E_P#P0L@u# zbJ|D}{;d28)u!q;slr$@ddJIFaRL+WY=VvAy8W=_l3~Ew1&Nh}vrgTQzwQ}HjD*CJ zY(AV{K-TnhKmBx1cMHFLy9zgK6(`#u>pytcFGiQVdCPAOgN*g5MyX~~4eq7!K3TB- zm_;msrTK#wM9Xjqm#Ip+Ru^Rwwqgm3KnykXE-x>i<1M?~KkcJzf61QG%jY$5`OD)| z5(mpLO0zKAXYBGryp12zU>C9|eGJl+ZJ#!;#O)nt*!{Ae89fYfXS?rG##px?vl25QIn)nr4}# z?kJ%L3EeS?PDB@T3Iwr;qlQD=)B_|&MW6Wy&lOG-jrO*akOtkXqG zLLEX}laeDFC84&X*)}zfV3veJM5yCBbrZDY)6xN^N$VzPNvGiu>K=1NOEyiSYldmn z>Hg0lD@VxBC66j%km1e2BC-p%OIoTiH7a73+mdfGy$*~3jo4s-k;Pyb zX=#~M7PB0)1zSzbYB{8GhB!9NS8)HK`D4wEFDK2iTB)9a61W^y~bA9<7cs59MfpIFk@I{xd9^{+R(fD*RTTSoWE*NWC9jtMG`X4sLuk*lgH_rBySd z>97X6(Z~pN?BX=UzYu-)%nI1*k_$euCECxP!&C*<;$&GhO6)WxxYGL8~lSJi{%p5Wzz+ z*jlnpPJGqK?Z6*RhPC)MS(yxx?=5P3N?7nai)zG<9!69DjKmX)*b731XX|(YX$&#T z;ugIhKHi?0fj1)OK>Jewhh-OpiHc-r;R0xer~h~puV2+5%dK;JyBGqI9+{kv?#>`( z&i>f*VE%oZ^M}cF{?(3#QOc60Dges9-h~!zH=}F+@hepRJBmMJ1z2Tj3S3k)^Xv>F z=?1Lsqu=u_=vUSF{bc;%8>}bc`uTg9|L1i0I2_IV59d^;dloFA4DBE`5b#ZmDUc;W zl;S0~)|}#a2p|83rG#x`IG0sZCywgBK21G;@^Sij_i1o*j=71bwDSk`w|%tW>Y=>u zf+R!!fKPt-)psH5s)4%QZ?fr$*`~uNQxnjftVw*kV z0m55{?NH4jMN<*2X!Bb2Wioxx|GI-lFDyyr0HkWXgTAEyhz=xd7bjSR3i6IjnBQ)~ zJYnMc<2}S)Ocv+V&M}MP3>NE~ug}>$54O7v5FBD#=%_!=2J`uk*czZzWjqj&O3)*6 ziBx?KUa+F2DZmzCf#{`N%Mtp!bJD1QuiovUU1hIzEweHi$0xK*@e|@r_=PPy>g+jT z%Zn5a8d4kn&Nkt590T=;cb%I{q0hi!S%pzC8!+|3yH?X3OjWC}IUck7b1LzC7eo;- z%?m#uN$^~z`yB?mOyz{F`0xY$xEOv47%-l&1c0ZI8g&HoGZ6rjD%wrI)?kNk5L-V# z!|bNc-U;#l;?4IzYhBlM1+2-9Ke`vhY~pbFS_N?pdWNGL^&jf+)2uk}X@^@pL;;RL z42NVB>~=_5A*R0j96}vS!)>?;5_tc@-osFYQwxmLDME$D$z_(|4y~)~PY7Km#q9v^ zHu!Uhb+I;_@=^)uy9Uo7xiQ({^#92bZrKumsEg+W+I1RlSzcKq25{mCNmHL@AQ8@Q2Z2Z`*m%P8QbnIQ{K$GAb|Gw(GbS|Eh2(HF&6i1 z4PK;Qf#rD*`yX+o3y-kf^_Mqqy$|;vZxwjhR3<|Oo}d-rQNcr?h!jz9LQzC!D)YhZ z>}Evrclw4XM6-ylaGqec?3t$H_Vk|F)5NzjPzJve+H*}}8ow&kQg9$p#0LmC_g;x- z!+x6va#enWamAmi`xnF^9r(Q}XW71OUsb2uKB0y(_IuZUZ+zFA-SlSnigrnQXuLTo-#Xn)MZS9oK{}bQ8OWeO#9H3I@lqyA2RYucc z-?2677*~09IEHq`KauK_Teu$Pr9taZ`$~M2(-&9pG~YjAwN@i~^LvFr zG2q=6ijZ#8j?vM2dWRT2($P;YVlgyz&2WfZEp*crbqp(-;*Zg`_#T&6(znNd%N4xf zXz{N>c|U*m@Kx~@OQT9Lk#gP;hiU0e#L)w9RHCh9tt4&Gz)e{w!D7!cDU?dvHG0$r z8VCV3#u*}8AZ>)stO#8JqVXS7(j-BzaB)*_scUU=;N&T9$f}$QL3PlEs*q5=YNh4*|=h`R907cU2wuiw&UrAEczIHmX|znR0z z7cYaHCNBp9S7UxyicRCZ0O0$WTjc#}fL&lP5UmTN!S|pwzrciN4_5|aJ+H*PRt&tN zeRzC3RupcRn;qofd~t!uIZ$3@&;)kNu`j#_Wg;+0vM@Tpka5zvW=}?e0MhWrt%52502<^e9RL6T literal 0 HcmV?d00001 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py b/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py index f522ebb..93ae61f 100644 --- a/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py +++ b/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py @@ -16,7 +16,7 @@ def test_non_standard_chromosomes(tmp_path): # Variables and Run args = { - "inputfile": "test/files/liftover_vcf_non_standard_chrom_in.vcf", + "inputfile": "test/files/liftover_vcf_non_standard_chrom_in.vcf.gz", "outputfile": f"{tmp_path}/output.vcf", "sample_names": ["SAMPLE1"], } @@ -31,7 +31,7 @@ def test_missing_chr(tmp_path): # Variables and Run args = { - "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf", + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", "outputfile": f"{tmp_path}/output.vcf", "sample_names": ["SAMPLE1"], } @@ -46,7 +46,7 @@ def test_wrong_sample_ids(tmp_path): """ # Variables and Run args = { - "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf", + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", "outputfile": f"{tmp_path}/output.vcf", "sample_names": ["SAMPLE1", "SAMPLE2"], } @@ -55,12 +55,12 @@ def test_wrong_sample_ids(tmp_path): preprocess.main(args) - assert "Sample names ['SAMPLE1', 'SAMPLE2'] do not match sample identifies in the VCF ['SAMPLE1']" in str(exc.value) + assert "Sample names ['SAMPLE1', 'SAMPLE2'] do not match sample identifires in the VCF ['SAMPLE1']" in str(exc.value) args = { - "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf", + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", "outputfile": f"{tmp_path}/output.vcf", "sample_names": ["SAMPLE2"], } @@ -70,11 +70,11 @@ def test_wrong_sample_ids(tmp_path): preprocess.main(args) - assert "Sample names ['SAMPLE2'] do not match sample identifies in the VCF ['SAMPLE1']" in str(exc.value) + assert "Sample names ['SAMPLE2'] do not match sample identifires in the VCF ['SAMPLE1']" in str(exc.value) args = { - "inputfile": "test/files/liftover_vcf_two_samples_in.vcf", + "inputfile": "test/files/liftover_vcf_two_samples_in.vcf.gz", "outputfile": f"{tmp_path}/output.vcf", "sample_names": ["SAMPLE3"], } @@ -84,7 +84,7 @@ def test_wrong_sample_ids(tmp_path): preprocess.main(args) - assert "Sample names ['SAMPLE3'] do not match sample identifies in the VCF ['SAMPLE1', 'SAMPLE2']" in str(exc.value) + assert "Sample names ['SAMPLE3'] do not match sample identifires in the VCF ['SAMPLE1', 'SAMPLE2']" in str(exc.value) diff --git a/portal_objects/workflows/workflow_liftovervcf.json b/portal_objects/workflows/workflow_liftovervcf.json index 6b14070..cd260fe 100644 --- a/portal_objects/workflows/workflow_liftovervcf.json +++ b/portal_objects/workflows/workflow_liftovervcf.json @@ -26,7 +26,9 @@ "argument_format": "vcf_gz", "argument_type": "Output processed file", "workflow_argument_name": "vcf_lifted", - "secondary_file_formats": ["vcf_gz_tbi"] + "secondary_file_formats": [ + "vcf_gz_tbi" + ] }, { "argument_format": "vcf_gz", @@ -35,15 +37,22 @@ } ], "project": "PROJECT_UUID", - "category": ["processing"], - "cwl_child_filenames": ["preprocess_liftover.cwl", "gatk_liftover.cwl"], + "category": [ + "processing" + ], + "cwl_child_filenames": [ + "preprocess_liftover.cwl", + "gatk_liftover.cwl" + ], "cwl_directory_url_v1": "s3://CWLBUCKET/PIPELINE/VERSION", "cwl_main_filename": "workflow_gatk_liftover.cwl", "cwl_pointer": "", "description": "Lifts over a VCF file", "institution": "INSTITUTION_UUID", "name": "workflow_liftovervcf_VERSION", - "aliases": ["cgap:workflow_liftovervcf_VERSION"], + "aliases": [ + "cgap:workflow_liftovervcf_VERSION" + ], "steps": [ { "inputs": [ @@ -54,7 +63,7 @@ "global": true, "type": "data file" }, - "name": "vcf", + "name": "input_vcf", "source": [ { "name": "input_vcf" @@ -108,9 +117,9 @@ "format conversion" ], "software_used": [ - "/softwares/7b86efcc-60c4-442b-b803-eeccc63ff175", - "/softwares/04e57a90-471b-431d-a5ec-b94b3dacf219", - "/softwares/0c17c344-5949-4fe9-a494-8283724152c1" + "/softwares/7b86efcc-60c4-442b-b803-eeccc63ff175", + "/softwares/04e57a90-471b-431d-a5ec-b94b3dacf219", + "/softwares/0c17c344-5949-4fe9-a494-8283724152c1" ] }, "name": "liftovervcf", @@ -148,5 +157,4 @@ ], "title": "Liftover VCF", "uuid": "cb916470-3ec5-4e33-8dcf-c6e147bf31a6" - } - \ No newline at end of file +} \ No newline at end of file From 102a50f173ad4c15ef0b9d9175d59c12141ed3ab Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 11:48:50 -0400 Subject: [PATCH 05/21] Update gatk_liftover.cwl --- cwl/gatk_liftover.cwl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cwl/gatk_liftover.cwl b/cwl/gatk_liftover.cwl index 6d9dbad..3d65561 100644 --- a/cwl/gatk_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -18,40 +18,36 @@ inputs: type: File inputBinding: prefix: -I - doc: expect the path to the input vcf - id: reference_sequence type: File inputBinding: prefix: -R - secondaryFiles: - ^.dict - .fai - doc: the reference sequence for the target genome build + doc: reference sequence for the target genome build - id: reject type: string default: "reject.vcf.gz" inputBinding: prefix: --REJECT - - doc: file to which to write rejected records + doc: file to write variants that failed liftover - id: output_vcf type: string default: "output.vcf.gz" inputBinding: prefix: -O - doc: base name of output vcf file - id: chain type: File inputBinding: prefix: -C - doc: the liftover chain file + doc: liftover chain file outputs: From ad728ae519e210a4e514793b7c92e2b09d8e96af Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 11:52:57 -0400 Subject: [PATCH 06/21] Update preprocess_liftover.cwl --- cwl/preprocess_liftover.cwl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cwl/preprocess_liftover.cwl b/cwl/preprocess_liftover.cwl index 9a93e9f..2f725d9 100644 --- a/cwl/preprocess_liftover.cwl +++ b/cwl/preprocess_liftover.cwl @@ -11,7 +11,6 @@ hints: - class: DockerRequirement dockerPull: ACCOUNT/gatk_liftover_vcf:VERSION - baseCommand: [python3, /usr/local/bin/preprocess_liftover.py] inputs: @@ -19,14 +18,12 @@ inputs: type: File inputBinding: prefix: -i - doc: expect the path to the input vcf - id: sample_names type: string[] inputBinding: prefix: -s - doc: list of sample IDs - id: output_vcf @@ -34,10 +31,8 @@ inputs: default: "output.vcf" inputBinding: prefix: -o - doc: base name of output vcf file - outputs: - id: output type: File @@ -45,6 +40,5 @@ outputs: glob: $(inputs.output_vcf) doc: | - run preprocess_liftover.py to validate input VCF file for the liftover step From 34768dbfaac4e36a349f117127f61eebae401351 Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 11:55:14 -0400 Subject: [PATCH 07/21] Update workflow_gatk_liftover.cwl --- cwl/workflow_gatk_liftover.cwl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cwl/workflow_gatk_liftover.cwl b/cwl/workflow_gatk_liftover.cwl index e7cfee6..de3fe69 100644 --- a/cwl/workflow_gatk_liftover.cwl +++ b/cwl/workflow_gatk_liftover.cwl @@ -13,7 +13,7 @@ inputs: - id: chain type: File - doc: the liftover chain file + doc: liftover chain file - id: reference type: File @@ -42,8 +42,7 @@ steps: vcf: source: input_vcf sample_names: - source: sample_names - + source: sample_names out: [output] vcf_liftover: @@ -54,11 +53,9 @@ steps: chain: source: chain reference_sequence: - source: reference - + source: reference out: [output, output_reject] - doc: | run preprocess_liftover | - run gatk_liftover \ No newline at end of file + run gatk_liftover From ec996b482417ab668445844f08bea9b74a2be8e1 Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 11:55:32 -0400 Subject: [PATCH 08/21] Update gatk_liftover.cwl --- cwl/gatk_liftover.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/cwl/gatk_liftover.cwl b/cwl/gatk_liftover.cwl index 3d65561..dc33202 100644 --- a/cwl/gatk_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -66,6 +66,5 @@ outputs: - .tbi doc: | - run picard liftover vcf From baa96638d45f98019dbfd66a743be19c51b62b1a Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Fri, 12 Aug 2022 15:57:11 +0000 Subject: [PATCH 09/21] Dockerfiles: gatk_picard, granite_preprocess_liftover --- dockerfiles/gatk_picard/Dockerfile | 19 +++ .../granite_preprocess_liftover/Dockerfile | 22 +++ .../scripts/preprocess_liftover.py | 78 ++++++++++ .../files/liftover_vcf_chr_missing_in.vcf.gz | Bin 0 -> 3184 bytes .../test/files/liftover_vcf_correct_out.vcf | 135 ++++++++++++++++++ .../liftover_vcf_non_standard_chrom_in.vcf.gz | Bin 0 -> 8441 bytes .../files/liftover_vcf_two_samples_in.vcf.gz | Bin 0 -> 3202 bytes .../scripts/test/test_preprocess_liftover.py | 94 ++++++++++++ 8 files changed, 348 insertions(+) create mode 100644 dockerfiles/gatk_picard/Dockerfile create mode 100644 dockerfiles/granite_preprocess_liftover/Dockerfile create mode 100644 dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py create mode 100644 dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz create mode 100644 dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_correct_out.vcf create mode 100644 dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz create mode 100644 dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz create mode 100644 dockerfiles/granite_preprocess_liftover/scripts/test/test_preprocess_liftover.py diff --git a/dockerfiles/gatk_picard/Dockerfile b/dockerfiles/gatk_picard/Dockerfile new file mode 100644 index 0000000..389782a --- /dev/null +++ b/dockerfiles/gatk_picard/Dockerfile @@ -0,0 +1,19 @@ +####################################################################### +# Basic image +####################################################################### +FROM cgap/cgap-ubuntu2004-py-38:0.0.1 +LABEL mainainers="Michele Berselli (berselli.michele@gmail.com), Dominika Maziec (dominika.maziec@hms.harvard.edu)" + +####################################################################### +# Setting working env +####################################################################### +WORKDIR /usr/local/bin + +####################################################################### +# Software +####################################################################### +## conda install +RUN conda install -c bioconda -y gatk4==4.2.6.1 picard==2.26.11 && \ + conda clean -a -y -f + +CMD ["bash"] \ No newline at end of file diff --git a/dockerfiles/granite_preprocess_liftover/Dockerfile b/dockerfiles/granite_preprocess_liftover/Dockerfile new file mode 100644 index 0000000..95b65d1 --- /dev/null +++ b/dockerfiles/granite_preprocess_liftover/Dockerfile @@ -0,0 +1,22 @@ +####################################################################### +# Basic image +####################################################################### +FROM cgap/cgap-ubuntu2004-py-38:0.0.1 +LABEL mainainers="Michele Berselli (berselli.michele@gmail.com), Dominika Maziec (dominika.maziec@hms.harvard.edu)" + +####################################################################### +# Setting working env +####################################################################### +WORKDIR /usr/local/bin + +####################################################################### +# Software +####################################################################### + +## granite +RUN pip install granite-suite==0.2.0 + +COPY scripts/preprocess_liftover.py . +RUN chmod +x preprocess_liftover.py + +CMD ["bash"] \ No newline at end of file diff --git a/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py b/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py new file mode 100644 index 0000000..4214459 --- /dev/null +++ b/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +################################################################################## +# +# Script to validate input VCF file for the liftover step. +# It runs the following steps: +# 1. Check if sample identifiers in the VCF matches provided sample names +# 2. Exlcude non standard chromosomes i.e GL000225.1 +# 3. If the VCF is not 'chr' based, add the prefix +# +################################################################################## + + +from granite.lib import vcf_parser +import argparse + +#Constants +CHR_PREFIX = 'chr' + +#list of standard chromosomes +std_chromosomes = [str(chrom) for chrom in list(range(1,23))] + ["X", "Y"] +std_chromosomes += [CHR_PREFIX + chrom for chrom in std_chromosomes] + +################################################ +# Functions +################################################ + + +def main(args): + + output_file = args['outputfile'] + + vcf = vcf_parser.Vcf(args['inputfile']) + + + # 1. Check if sample names match genotype IDs + sample_names = args['sample_names'] + vcf_sample_names = vcf.header.IDs_genotypes + sample_names_err = f"Sample names {sample_names} do not match sample identifires in the VCF {vcf_sample_names}" + + if len(sample_names) != len(vcf_sample_names): + raise ValueError(sample_names_err) + else: + for id in vcf_sample_names: + if id not in sample_names: + raise ValueError(sample_names_err) + + with open(output_file, "w") as output: + + vcf.write_header(output) + + for vnt in vcf.parse_variants(): + # 2. Exclude non standard chromosomes + if vnt.CHROM in std_chromosomes: + + # 3. Add 'chr' to CHROM if not present + if vnt.CHROM.startswith(CHR_PREFIX) == False: + vnt.CHROM = f"{CHR_PREFIX}{vnt.CHROM}" + + vcf.write_variant(output, vnt) + + +################################################ +# Main +################################################ + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Converts genomic coordinates between two different assemblies using pyliftover.') + + parser.add_argument('-i','--inputfile', help='input VCF file', required=True) + parser.add_argument('-o','--outputfile', help='output VCF file', required=True) + parser.add_argument('-s', '--sample_names', help='list of sample IDs that must be present in the input VCF', nargs='+', required=True) + + + args = vars(parser.parse_args()) + + main(args) \ No newline at end of file diff --git a/dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz b/dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..c9be755323b1ef54b4df96f02bad3fa903339c53 GIT binary patch literal 3184 zcmV-$43G04iwFn<@8V(r18iw#bZ>TLa$k01W?y4ya$jv}b8~5KXJ2V}T@h*NygMG-N^dU%7wtL*Z5VyMy(kuYg%Q(uyP5-CquunQ$7HzVp zJ~dowY8E9wR|V)%Kqs!N+j>nvF9e9|fQN>z)dY;A03oJ9ZL20=mI99E5|dhW4OS^& z6HuX9c1^%81x%Odj%C$BI;DVN>6QUFYXa_3fKZ}?uQdUpl@hvX>V|F9K@cKIXqsh` zx}$_1C3MFmj%(H>j%*TG?>e?qmpIa?*}CDHMx8Bkx5!CG2r0vsrHqq( zm`lkPE!as-QpA;Y*~G3VNRCUFr3|+PmlPr0fil%BCG+K&S|fVmR&=?fMVBK$_l6|1nEmbDXGFn1JB`2Zi>n)XnI^4)Y$;B0VQcF1}GB%5llP;ed)hc36b0vU5LKZV<%f=+K3sCM(8c`kv*#(>pn&~BS z6l|ce#i&&gqi%~fvm$01EqPQCgA8vD7Li@BUD8sGsZkNL+?ITk>2+WPXv78sj4TGj zNK4D4vY6$VE!b*eR?8-pGsLlB&T7V1aRJ&u%LbGQfsWFWy;SlPvWlDx>$x_`1sMN~ z6GCR*gQnS*cAO+(_lf#9osB-aBw)*o+!O=rl`WfJbQ zFpl~^On3W)EyI+lX_g$8*&zwG>T{5U0Y3ksqIGfa|1vLC5}|UDC1F%kiVxIyeWEhT z>I$z%vqE22=$l3v+pwgna2T%h1D6^`s_(Z~pN?Bg`VcZfcFVg+n<#RVVP3hif4VX6Xaak8o!@}~2Ti^3E=W6F!8b-2p? z$yo9X3g7UFl?SS128{N0Q3{tKlZvG2j7C&?*a-PjCw?L~tku zTPwEBiLV;D8Tg~=uomAYE7KwJy+dt}2@8H_QH|Kq{b=UDBJqSG_KZ;BY#lEljUi@P z+@klx$D3DX;I+s((Eb>}X4wZ}q9WN@xCENv^dE2I&5Igjxpi)DA44G06O)V4?JG!G zus;qwn1A2q;(j_?e6^!tl(M9$3V^b&ccDew^=Rxre1*z?NAYK@0P9Rmfs2Y}o}ED? z-GHZu==XdH`c*Z4H=TU=2J1<3R}6v&bw zO7W6gYff?8gb#niQo?pI?8~aDGh6jvpJtvv{WyEL{WQ3Kjk&3)w2OQ7w?nk#>Y=>u zgCs-#fKR^p)psH5s)4#YY_sqpSp60(A+uhJ0{y-ts`5HyTd3km{0z}axnjftV%r1b z0m56yUg+2qDWgSMvY{1leZ>*+!n5x!cb3EpEuc^d~eGo;! zG%x&sB*A@|9`+dQGL;jy=ED#4<6`(JV8D375&)h;YSa-dUWovhRMBqwwFWzUfmr(a z31&BS{!WPh7jM4*S?jv4D_~8p{n4EuW*djAmnw)c=oyZ#)qkkNPxE5GryXwb7zNk{ zF>I1;u-_wPg_!#8a|m@T4R_%-NZ{`m_8x{JoLXR{P7x|JPA;!;NCn zEO&T=@ta$1oCM2LegZY!;qEGwy!(m3S%QxZ>3c!d8Wt8!NrVNWhHJ#8O9crxgZZsi zOpOfgfI9^uU}nh}EY!M29o#IQLy|5;a-QNV!;!)u$+tz6ZC_Lf$)?~7F^WXz@DT5Y zkDM5@jvB@C)3n?NyquS_sK_wxS@e>7);%FIjCgOVpyXX5g^`|NB!Q1qz@l1xxhQo| z6>EhfHz$f(%Be+g;_k&3Mzmw~dlv}gqFsr7c{%pqH$a8zSFLlmO)!b6z7xt-*a6!R zcmfkSB5wOc_lJ0Q$JSNlI8%Ok*~AAt-xMdVCA#t|?QjAsnp#T(tBAp_uJ5ER%j!}v zoYj@dlVqvv)4b;oX2a1CT{4F8Y(eU5>xUrXmG!*gTwoO zWZ(&2xK>o^p~`}C5P&PZ=YvAjghy#)ijuxWx2v%LkbhHLmiDj3x z!u63?;uOS+D`fW6`<6uGOxvUfAUZZ?8W8N@i-jy(hHE&cU>QamVA(KjiRFpH8Q`9Q z86Q0dioiFtEwT(r#1e%JV-S2_aU8prOTtj$HJHl1Hc9ETNv5UCX5q0}I*c&R`RG8S zH?)ppVj6Kjbt8Atax^)Sgo~oR5=BOjAsb+5h1WdxzKFEsuvc>!mkj)TLC%!d#8hqJ zYi&*V62G)nEU87sKUwJ2K%TMXfBlj+D>W(xr%A&X`OO?wzId79G4Za4g`2{B2KKNuHp68X4Z})*$RB(?k$D+XPa +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##ALT= +##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 +chr1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 +chr1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 +chr1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 +chr1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 +chr1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 +chr1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 diff --git a/dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz b/dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..63c9e172ea63a2d1cbe4675c8e56a2636fb49ad8 GIT binary patch literal 8441 zcmVF!h18iw#bZ>TLa$k01W?yb^ZeMeBVQyq$a%5j)XmW3DUukYG zc4KA$%v)`5+t!l)%=`-B7TBhMV)ML`lfP9#W+0r1`LMEYsw0&di*dXI@Bu_ucJmUfwQO57W)z`|RYC-V-mrqmxI|O?fCo zDZ@yHaBjVPT3wfidATT8aOvuOHeKAzZs2e|5_@(JX;$TJxhfYxh8!mQ`}_CHhjRbv z*$J3)jF`k!;yUqQf)QL2qmExy1fouqrND>vP4uPr&L=uCBT7?|~ z-4F<2w2W+rz|;hBm{`oFB_34*j@(P7L_;MbaQ0p*5&Bd%?w3n>D}qN#-A`paDh;0B zZwOB=WyO|9FqMvcD!mnlj>=!rLL~K3iNxm>iKy!!PA=tB>104IZ--P2Fx5}x+bcF< zuk<2m@k+!1uh^qXqJe$X+NVlnzZ0q!uS5;Fs`Okn6ybn>X`jk$0imO6@v^3WA4AU) zT7YT#bqsx~L=DJg+f)Nq6@EC10lBp26Iv-fAeXPBGIBsJUwUPe0ekr{VdAX}=BRwR zOfn!>=X)sh0;GX~0Q3z48D+qGrKc%) zmDnoQ9XC$}DF5`9LC&)eVHnx4sU#u#82?VsPu>#E>Dk5QadvujJU)MV_*kwE!*TKX zu{=DPPw&P@<@$Oxd)&;Hi^G4MK0L0<>)E;-tv9Qu>&??@Iv;(Su4Yrb{*M;dtM|n} zFKU*8Szc{cvqgs~UKlNJw^%MVU50Ot&ntd=%0 z-^+3_x;#D^L5ns@-xQg;ddDyK(i z?+4`eaaB%#D;FKZ9=|_6FP`Ce%&7c?qC#)IyaqRhJ=-h?_`ZI7_Dm0a!#)SxKTaW7 z9;dU_2#vj&U4xmS`(MtNcb`Yoja!}8dtAasthbiDI)3*Iqg<8$exl9%`)gjkKRv(t zVnt_*b-C(80bus!DhzOYb9_>K_yU&yy;%N^2ynX@t-%&8&g7lbg?9q3J}rK8x1e6_ z#V=3u*I%*vYIb-3Etdb!^P>+(#}~!x=b+PNIlTcGdW5}!4WBR31vabcVvUEiTEB~9 z5PtX@E3L|hCB$WWsU4yEUq7E`#p&Dg5AS|X-#llzQ%2gW_oIJ3Ev|_@!0Y35wL$xV zJt_D{FK6Z6XgYd$nr~*WrZ@kZUV~?St^oRnF{ASBtegXiua=)+chaswB;H=jJ`=Bw$$;~We;!?Z9SeSeW&U44(K0ibF_8v(8a zIl@t_NB7fDh-m9Ih|SvqySFATThhOKjvH0tkKR2&y4rm1a=ECTG2g-E94Ex{*`MXj zcyw{UDsNt`A<)pYv){`3?0&ff)1$n1Zn-t&83>l!*`lfq%F+AmWVC*Ss_J&uzaJNu z&q3m=$LV4LmL}i_Nmdw_>!(ND?An!A<;a-v?EY|6p;`ovcGhfbbKDUK90X|2^Z$|$y zI{Nvdiud)HR%~_w!e9wOGM_#^qRA?<(aYawfMe^~!)!iX!OuU-AD}40ss&2wHHr#} zlW1;ckGT4%`~$XbFF|F)`Uf`E zE!(PHUeUDt6ya9-D<>|vzY;sekEnotZJc}8C#Eo?m&|+w5ip;DonQ$k5v{$&`}OkSvb=3G$C~ob&v(lw>~E@$ z>l#&g7I)~ts-!lMz*=Mw)!j&%mzypX6lYy#ekEBm`}ty0q~}M+XPsUYz^Z&09X)~E z9vWXi+{^*95BDEtiy1({&2<0cY_WfhJ=EP9xaV~5@$=}_tJ(}F_Ayb>o1Vy{t52?fLHdQh26XR^Tl`B z>&v%i0W_Pz=}`b3bO3e?FbD#6kpL@{fITy~O3yCd9E%qs5TO++Ap1Z~IWZ-OltGm;gX8m~!#E0MtPdO}K=>Wd81V2_tiuDEHyw&II6iU! z5RPSZz=N7YK7wBTaL1}0B$!-X1_Howme%Lp&kR3;5Ktxqs~1Hb zGi)1DxybNJ2C8RSy;HbB1(G0XMpi(QG-JCES7e$Y%gW?H07rPP0EWm5{}ud^!MF44 z3|A;n4sbeXEjaVARg>7nVUJaXjcT*Pabs5zSBB*ywCuL_1CBH6n7)R)7VBi=( zELz#>M8}4$45bZNS)0hSavqQcNHbWbK+VBKpc}5uUWP_Qh(ZhFCKN>w$5F?Zc%e#b zSjuT_M6I(Cr?twa(%94rqZqq%j3LqMa2y+SBhpi8r=pER-;r2FPOl>HrN=Exacfy? z+WUfc+EX~|D2%tHg}cF7D(kaU=f>x*mD4Rny;gNgXIF}?8?%m(JJu?`{Fb3rI;Kds zorZ2I`fb?yM=(WZ%A6o@l#{<#H{~vd@n=peQp!jXh|`p_2kr*lgR2#lOc*|RCFSc5 zDJk63tU~aNZbwj{b!pku30Lp|N=||jP~Z}nu=0Yn2WGr6Xk=bA#VsSF6X(pA0ucz8 zB`I7Up?f=0WV@Vi&>^}O_jY&d)@;YEJKZABDp=@Ws~sW5Utmd}bS)o*8vv%-7;8;v zwd}#z#5So5j*oVwCtyja8IJKNh947XIL6AnXkMELA{*Jb0T-DVOzQN7ISO^cj7$3e z^*`U=vn_w83(ngq_Yl>9x99Bb*?_l`NF8s_YHu%wy}fbw+;?}zCzdK_@qKsCoV(|Z zyBCA*p4aZ4S7JaGm9WFDoDxN067}q{l1aVCaO~yLI;)5#t+aP&mp=$ACrYC~Kn|a% zc&y|^L}MHJ{t$glSE}V;Qwl;%q?FFng*0Q;R>~S>8l{xM?5RDa@?Tgh=(C0Rg@Uo(svU~88HGDb z?1G%%c}c-VT|vH%PlK!M0D~5 zmMC(NfxyRawj|a4^(ly|yb6t81?a8S5VdxOQd{~&p!A7^fbxZp@x5kWcoiA<5l&(> zH%4yC?Qy}yd+ss{I!BeB=r_tNlwmmFa}Idw6H-n&S&`St4Lu8;3@eI)a0&@%y_7R@ zmNH`US)xiahGDv&XsJ9QjATIzM)cH4>O{+2GQ)nP7L@oA&=%W*;$5J znw_;jw z_7=RsG|S4_+L_x-ZoRl|<*p-jnvhYr9V7>+L?gn2lNfa_1Zwo3qa^2@rwz!+G;BB0 zw)Pw{c*=vcYAfky;o1Y2W%8nOE80EEmyB#KY4IXXDCZnx4(hzgsVH?(KU;9uQ|BYL znFC8VBBh*^Rztypho))eA?)eg?Nt6%HRH&m+mps7iu&fvi3hE4yla~JB{daPeq#fl zN!_2&3+sUqq4fGy5m})Vf2>+XYU(V(`&8w1Mjh9k)2y3I4f0TS0Uoe8XrK_}vmBfm zGh{z<{iwquj<~{=aG7(CyefR$wAYWbu#)`%W`Z8Ma$VwXqmZkXF1X>s_Bk5A*O z%Ev!u2=EU8e20V-nOINbEO(Vn#seva-;5t;(6oXoBA$`(s3hZu9W)rh0SQ^Bf^!0a zoP(Wljp|{}s5E=Ya+jRkz=O*zw*psRRM6p#RmsK)$_+P&Ke;;RV~iE3X+&r|)7vn% zeU(-cHxB`lxG}!2jIW!AEBVeA5!M3$KyQm8Ef(~sb`pou zR@Qe7q7tH!U1@21Wsii}9!@}S8pp0#d==|)Jp6DRR)NA%jB#`iEvg;M(3jTAQ2Ecv z!z8hwA{9p(i=%qpOTM zeO$~VabZS%9UXCBS5%_9k)AIqCwkW_=Pu!;98h`H zwPktL*QMQFIctl`HE3g1_aMr%D7S!Zi)NKY3$Oi5J7PASa_Y68RZCx6l%HADVnV_d z#`bkK2d($P$c3#my039GVKX~~V-Ndm4Su!?z+rdW8J5OQWM~~KLSH#C&?`yndwOCb z-5Pxe0ceSzs_F!aYIGw9n>mP%uy2qP8T}Vw8pY!%9t_h2I$({{LW%Rcc%1m63k|@Q zbYzb-eMTMGZPI<@tYxhF=Jq0iR&HBnI|dMSGAAI&u*@n6ClLv);t0V|x1dFs7_x8A zgbxiLy3eF<$ft~GJ>0cY_f7f29+L|t#_(M!AOP^dNW&@^G0`ESBlL(93z|ePn!n=< zv{bS7C(qI}jpIas+`mXMNdWMH$uC79BK?fXbBjsO=vfIhhHq~Kn}pp7Y-9#?(h;N_ zjgtaBF`&J#5a7EGO)EQvR`UnR6mWr03EjQk;A-~zbkRDI<@FOj%=LUacmFH2adiHR zG@T3l7JyG#5u}-W^iQ=34wj(Qw9<2sFfskP)2q3}Dg3r`!eS-5B#se8jL#rs2)T3I zatC3+wY(=0;L1^==u>;;K0AemjIPu#X@Ga|3#mKj_C37%-jxH%KtO*yFMD2=8xC3# zO>6{OTEIRnyg>;oXh7^1(8MB^?lYXo3AmlVnQ5NY8&&ulmu2_oXA@y&eI_UgJ^M;-YB}J^6MykAa#=U{!7uzI1~a zH?D_s7A-4m6es>mA#sHOG5O4oaQ3G_C~p!A_r%TU5T|Xezoj~K^A0!p$oylTB&5`( zW06!GwEq(7u)Zntj;gO}NK({S>1byLM%RsTqN`OhfiRj#(*zG$K*x#iTSDr%@^fmE zQ#b9;-D^X6H6AS7wX~YaaMPSQx8v8YsDRDcHWYfy@ z_U+e?KLr)aRMG~Q+|te$x@9fAPE~s&RVy+h7D?I=(3CdJH07Lz?E!W4Fw;u}?Ssq2 z^Ax(D>vm%N+5JJ)DmyI-|G>radN~9DAB8xEoQ0Xo@_*iz&+{ zRWyo;NjEXqi9e)PV@z!bB}Zh_S-I;XvHYkt4pRa2!Ay_H5xgW<^B*t?B~0OQ3Z+g* zkVs-CQ-vNR_Np*XO}7svDW^lA0E1U{tWw!+tG{2)AJV4 z-#9O=tTN-oOdiLBpkJWk@p_)MMENOlz|)R7yX-CDFHPod73o}u7< zm+DND>WuAV*mBukL3FJ2WQbu8iP%^;X?eO|P*&+Nsge7j-LH$oN9mGn)f-&cl{SCYy$42m^(B|rF2W7@S)1Pm8VAIzXtw6`|(t3YZu0;IqlW6*3O~N zrPkK;woa>Y^}`)_1q2R`bgoA_TcWX5XYFM=Nk=u6TN-}(QPlpWkv9q6rtE)VYT7J? zIYDkSOZ8rUP1n}lKV-Xq2tMH57-f=uy^9c}ZHv+r(*X0qZH)80GZZhw`KZPDMD#c} zRXfp|6-J9O%<%F!`BQ7(9Lx*%=#}Zl#PXN)9AhRX(FqzIP#`d$2@#o zsbTmohm~rgZ8VEqsVtP|u97?kDlcT3K4!*bcWbNL1gfepTStsfMA|$)VsRL7u9cY~ zK7@^;V%WbJDoVs5Bu0RZBQl#U8Te4(DO{3>dUWqwfDcY9ToZkZBhmlBmwp`Go3B53 ziqa6DVUihNAfV)NO*DMj0gfAzFYKKtopTNb#+7*#{-(zd)#Hb1@k4d>>kt#u+xX$+ z&;%f{P*;MZ77Q~@0CuArnXWL_6YPLxb&FxZS;M~kDcwA7Hz_Q33&$@#QKeK{0B;1= z&qN;(EI_qJO-GW77%yD>^XwTeY&PIRRG>y}=VG@%o8V+9;&DG~YX!ZEelM=(BJO2U zeoyi}bLGn};QE&8ANh!rQT>KLw?%jJ1;c74v)ZaAbs5)w!|&cfxJrAZ5aN`By9m1u zjvD}kdG8rGqhW~q$QJLWA%ttYejD7lK6`E4?=8e$tiJ~N=|cKNay~~K?(!qdKa9UO z6y*14swQn!_;PT<@<1;0G|kj%iz$BKg{jk4rU}Kc`3+@8_`_+A7h2UC3Qmhs;~c$t zl8B!XIgEsGxHx5@l=t(@tgem-8W07Ki8g0%Gh_*?xIW_>@{R&dE%73pZuAckdx5mwx}bYkRud*b0pv^m`TP^j}y6 zu=ql@#nqgLkN4Z0iD^=)okihbR;YuD6U0bi6$0>?&}k*bz`oEawk&P2*8W_-!iPa# zst7lkFBirEN7$kxs<;_!D3q2W0r(L2fMEJVm>}s~wL3V;{xX1M6^?!ODzB#-DgO5XLod+nmw30-xQcrxLg?9>@%J1N&&zui2ZcPs zJaol{)$>;j}Zp8eTm zFFg1J3->AwQrFeRraFpqXko4`%5w{|^hMPc=0g-M@>Y&5%#80DQnA;czp;T6@mpzK z-|XfWOYrjx-S)KgVO?dy$Sbsl??%}??7TpF4vFKYJ6tVkGW9!wl3xbYu z6sj;LY->&d(OZo#7iv_aiFYzAEVCuwH~t0cVdq}{5C{dsR*0eKF)cQw=@Q){r9 zDM4j5?uiIs(9DS_^aQN8gFQ-8KV9OOCh4x!gUaWYEP$tYe6M3;HK5!~lu=u^GL8DK zIK^-Ot~t$_%92x3`>)t~Obp6v%G#S#(vF**N+oeWvWu)4xe9NXT6c*k#cvK4WQ4Ij z#PKbTIsW6?-^`n=u#K`@-6Aix_N|Tmen>k7eT_h1o%B&f@R zAwEav&gbwF;j_f&C33aBkQ{=iupZ(NCKlpkU*}QX?tF$W(WOt(vVXNV7+G;|lOUJ-t=L+I$!;HWA>ZB^nVR#k+P+pU?uKStGoRf>a&oZ4N}RR5II zkrRfKSL|8tF)xRIMjq`!p7kO8nGzax#0O@PkGz5adWt!uB{Nlr00E4aSIax3do<3Y zHO5FI$84`SIM%V+ayFj_K40b9tQyVpIjz7@=%Q6f?|kUzQN3F+AVYZKrL^C%YkPO9 zd!r1aeq#oww6~LJRd_uZ*>HpDA>43nVPbI5eDu=2*tf@V_g%mz=2a>8VL6sNq7CaW zQYESq69Hlv17v^S6G6+XczB9D*P<{`FPGRbxc3KvS8eet(jUiZQ_fH2G>fD|7E|f3 z@HtcCX^Bt=t>Q=SP$|DQmAozpd=wJKxF8A+OQHn{Lw_nQNN_RT79=KlC9ZrUa9Eoh z7&){Da8wsY-xG(Z!7S)92F6f}E^IY4VaDLyD*K0?{KH_gZpyChCI0K|;loe~lkMP0 zoKwD9l6msl*{PFUo7PjKxov>6h3xP)8QzY}V#_Jvn|*O9bzOZ$(9&mA8s$mrmx9SW z^VfDEKsv^!S_AX*u@bsu+y8Z8(>6z)r_7-I;MpY!^(lXN?fudfrjyk}CacuTqWk1r bo>P54r*u~FVNB=-L$2e0w2P}0{cZpNJyTq@ literal 0 HcmV?d00001 diff --git a/dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz b/dockerfiles/granite_preprocess_liftover/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..c63e287e86de7f73db7c073636614665cc670c0c GIT binary patch literal 3202 zcmV-|41Mz-iwFoD^5S9u18iw#bZ>TLa$k01W?yu7Z(nm^ZE$R5b6;t0E_P#P0L@u# zbJ|D}{;d28)u!q;slr$@ddJIFaRL+WY=VvAy8W=_l3~Ew1&Nh}vrgTQzwQ}HjD*CJ zY(AV{K-TnhKmBx1cMHFLy9zgK6(`#u>pytcFGiQVdCPAOgN*g5MyX~~4eq7!K3TB- zm_;msrTK#wM9Xjqm#Ip+Ru^Rwwqgm3KnykXE-x>i<1M?~KkcJzf61QG%jY$5`OD)| z5(mpLO0zKAXYBGryp12zU>C9|eGJl+ZJ#!;#O)nt*!{Ae89fYfXS?rG##px?vl25QIn)nr4}# z?kJ%L3EeS?PDB@T3Iwr;qlQD=)B_|&MW6Wy&lOG-jrO*akOtkXqG zLLEX}laeDFC84&X*)}zfV3veJM5yCBbrZDY)6xN^N$VzPNvGiu>K=1NOEyiSYldmn z>Hg0lD@VxBC66j%km1e2BC-p%OIoTiH7a73+mdfGy$*~3jo4s-k;Pyb zX=#~M7PB0)1zSzbYB{8GhB!9NS8)HK`D4wEFDK2iTB)9a61W^y~bA9<7cs59MfpIFk@I{xd9^{+R(fD*RTTSoWE*NWC9jtMG`X4sLuk*lgH_rBySd z>97X6(Z~pN?BX=UzYu-)%nI1*k_$euCECxP!&C*<;$&GhO6)WxxYGL8~lSJi{%p5Wzz+ z*jlnpPJGqK?Z6*RhPC)MS(yxx?=5P3N?7nai)zG<9!69DjKmX)*b731XX|(YX$&#T z;ugIhKHi?0fj1)OK>Jewhh-OpiHc-r;R0xer~h~puV2+5%dK;JyBGqI9+{kv?#>`( z&i>f*VE%oZ^M}cF{?(3#QOc60Dges9-h~!zH=}F+@hepRJBmMJ1z2Tj3S3k)^Xv>F z=?1Lsqu=u_=vUSF{bc;%8>}bc`uTg9|L1i0I2_IV59d^;dloFA4DBE`5b#ZmDUc;W zl;S0~)|}#a2p|83rG#x`IG0sZCywgBK21G;@^Sij_i1o*j=71bwDSk`w|%tW>Y=>u zf+R!!fKPt-)psH5s)4%QZ?fr$*`~uNQxnjftVw*kV z0m55{?NH4jMN<*2X!Bb2Wioxx|GI-lFDyyr0HkWXgTAEyhz=xd7bjSR3i6IjnBQ)~ zJYnMc<2}S)Ocv+V&M}MP3>NE~ug}>$54O7v5FBD#=%_!=2J`uk*czZzWjqj&O3)*6 ziBx?KUa+F2DZmzCf#{`N%Mtp!bJD1QuiovUU1hIzEweHi$0xK*@e|@r_=PPy>g+jT z%Zn5a8d4kn&Nkt590T=;cb%I{q0hi!S%pzC8!+|3yH?X3OjWC}IUck7b1LzC7eo;- z%?m#uN$^~z`yB?mOyz{F`0xY$xEOv47%-l&1c0ZI8g&HoGZ6rjD%wrI)?kNk5L-V# z!|bNc-U;#l;?4IzYhBlM1+2-9Ke`vhY~pbFS_N?pdWNGL^&jf+)2uk}X@^@pL;;RL z42NVB>~=_5A*R0j96}vS!)>?;5_tc@-osFYQwxmLDME$D$z_(|4y~)~PY7Km#q9v^ zHu!Uhb+I;_@=^)uy9Uo7xiQ({^#92bZrKumsEg+W+I1RlSzcKq25{mCNmHL@AQ8@Q2Z2Z`*m%P8QbnIQ{K$GAb|Gw(GbS|Eh2(HF&6i1 z4PK;Qf#rD*`yX+o3y-kf^_Mqqy$|;vZxwjhR3<|Oo}d-rQNcr?h!jz9LQzC!D)YhZ z>}Evrclw4XM6-ylaGqec?3t$H_Vk|F)5NzjPzJve+H*}}8ow&kQg9$p#0LmC_g;x- z!+x6va#enWamAmi`xnF^9r(Q}XW71OUsb2uKB0y(_IuZUZ+zFA-SlSnigrnQXuLTo-#Xn)MZS9oK{}bQ8OWeO#9H3I@lqyA2RYucc z-?2677*~09IEHq`KauK_Teu$Pr9taZ`$~M2(-&9pG~YjAwN@i~^LvFr zG2q=6ijZ#8j?vM2dWRT2($P;YVlgyz&2WfZEp*crbqp(-;*Zg`_#T&6(znNd%N4xf zXz{N>c|U*m@Kx~@OQT9Lk#gP;hiU0e#L)w9RHCh9tt4&Gz)e{w!D7!cDU?dvHG0$r z8VCV3#u*}8AZ>)stO#8JqVXS7(j-BzaB)*_scUU=;N&T9$f}$QL3PlEs*q5=YNh4*|=h`R907cU2wuiw&UrAEczIHmX|znR0z z7cYaHCNBp9S7UxyicRCZ0O0$WTjc#}fL&lP5UmTN!S|pwzrciN4_5|aJ+H*PRt&tN zeRzC3RupcRn;qofd~t!uIZ$3@&;)kNu`j#_Wg;+0vM@Tpka5zvW=}?e0MhWrt%52502<^e9RL6T literal 0 HcmV?d00001 diff --git a/dockerfiles/granite_preprocess_liftover/scripts/test/test_preprocess_liftover.py b/dockerfiles/granite_preprocess_liftover/scripts/test/test_preprocess_liftover.py new file mode 100644 index 0000000..93ae61f --- /dev/null +++ b/dockerfiles/granite_preprocess_liftover/scripts/test/test_preprocess_liftover.py @@ -0,0 +1,94 @@ +################################################################# +# Libraries +################################################################# + +import pytest +import filecmp + + +preprocess = __import__("preprocess_liftover") + + +def test_non_standard_chromosomes(tmp_path): + """ + This test checks if non standard chromosomes are not saved to the output VCF + """ + + # Variables and Run + args = { + "inputfile": "test/files/liftover_vcf_non_standard_chrom_in.vcf.gz", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE1"], + } + + preprocess.main(args) + assert filecmp.cmp(f"{tmp_path}/output.vcf", "test/files/liftover_vcf_correct_out.vcf") == True + +def test_missing_chr(tmp_path): + """ + This test checks if chr prefix is added to a non chr based VCF + """ + + # Variables and Run + args = { + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE1"], + } + + preprocess.main(args) + assert filecmp.cmp(f"{tmp_path}/output.vcf", "test/files/liftover_vcf_correct_out.vcf") == True + + +def test_wrong_sample_ids(tmp_path): + """ + This test checks if the VCF sample identifiers match the expected sample IDs + """ + # Variables and Run + args = { + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE1", "SAMPLE2"], + } + + with pytest.raises(ValueError) as exc: + preprocess.main(args) + + + assert "Sample names ['SAMPLE1', 'SAMPLE2'] do not match sample identifires in the VCF ['SAMPLE1']" in str(exc.value) + + + + args = { + "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE2"], + } + + + with pytest.raises(ValueError) as exc: + preprocess.main(args) + + + assert "Sample names ['SAMPLE2'] do not match sample identifires in the VCF ['SAMPLE1']" in str(exc.value) + + + args = { + "inputfile": "test/files/liftover_vcf_two_samples_in.vcf.gz", + "outputfile": f"{tmp_path}/output.vcf", + "sample_names": ["SAMPLE3"], + } + + + with pytest.raises(ValueError) as exc: + preprocess.main(args) + + + assert "Sample names ['SAMPLE3'] do not match sample identifires in the VCF ['SAMPLE1', 'SAMPLE2']" in str(exc.value) + + + + + + + From faad30c941f565a4a694ff056a38a366d85705c6 Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 11:57:27 -0400 Subject: [PATCH 10/21] Update gatk_liftover.cwl --- cwl/gatk_liftover.cwl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cwl/gatk_liftover.cwl b/cwl/gatk_liftover.cwl index dc33202..883ca54 100644 --- a/cwl/gatk_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -41,7 +41,7 @@ inputs: default: "output.vcf.gz" inputBinding: prefix: -O - doc: base name of output vcf file + doc: name of output vcf file, compressed - id: chain type: File @@ -49,7 +49,6 @@ inputs: prefix: -C doc: liftover chain file - outputs: - id: output type: File From 3bf66fd06100e2d9e118f1ea0d120f63373b36ff Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 11:58:46 -0400 Subject: [PATCH 11/21] Update gatk_liftover.cwl --- cwl/gatk_liftover.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwl/gatk_liftover.cwl b/cwl/gatk_liftover.cwl index 883ca54..4d81f55 100644 --- a/cwl/gatk_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -65,5 +65,5 @@ outputs: - .tbi doc: | - run picard liftover vcf + run picard LiftoverVcf From bd7251e1d6e20ab3d0ab1e6cac0b725822b286c4 Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Fri, 12 Aug 2022 16:02:13 +0000 Subject: [PATCH 12/21] Dockerfile gatk_liftover_vcf deleted --- dockerfiles/gatk_liftover_vcf/Dockerfile | 25 ------------------------ 1 file changed, 25 deletions(-) delete mode 100644 dockerfiles/gatk_liftover_vcf/Dockerfile diff --git a/dockerfiles/gatk_liftover_vcf/Dockerfile b/dockerfiles/gatk_liftover_vcf/Dockerfile deleted file mode 100644 index af3fe3c..0000000 --- a/dockerfiles/gatk_liftover_vcf/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -####################################################################### -# Basic image -####################################################################### -FROM cgap/cgap-ubuntu2004-py-38:0.0.1 -LABEL mainainers="Michele Berselli (berselli.michele@gmail.com), Dominika Maziec (dominika.maziec@hms.harvard.edu)" - -####################################################################### -# Setting working env -####################################################################### -WORKDIR /usr/local/bin - -####################################################################### -# Software -####################################################################### -## conda install -RUN conda install -c bioconda -y gatk4==4.2.6.1 picard==2.26.11 && \ - conda clean -a -y -f - -## granite -RUN pip install granite-suite==0.2.0 - -COPY scripts/preprocess_liftover.py . -RUN chmod +x preprocess_liftover.py - -CMD ["bash"] \ No newline at end of file From 76898b05b6563aa1bea33aea685831a01c262bfa Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Fri, 12 Aug 2022 16:05:40 +0000 Subject: [PATCH 13/21] Dockerfile gatk_liftover_vcf deleted --- cwl/gatk_liftover.cwl | 2 +- cwl/preprocess_liftover.cwl | 2 +- .../scripts/preprocess_liftover.py | 78 ---------- .../files/liftover_vcf_chr_missing_in.vcf.gz | Bin 3184 -> 0 bytes .../test/files/liftover_vcf_correct_out.vcf | 135 ------------------ .../liftover_vcf_non_standard_chrom_in.vcf.gz | Bin 8441 -> 0 bytes .../files/liftover_vcf_two_samples_in.vcf.gz | Bin 3202 -> 0 bytes .../scripts/test/test_preprocess_liftover.py | 94 ------------ 8 files changed, 2 insertions(+), 309 deletions(-) delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_correct_out.vcf delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz delete mode 100644 dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py diff --git a/cwl/gatk_liftover.cwl b/cwl/gatk_liftover.cwl index 4d81f55..a942b17 100644 --- a/cwl/gatk_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -9,7 +9,7 @@ requirements: hints: - class: DockerRequirement - dockerPull: ACCOUNT/gatk_liftover_vcf:VERSION + dockerPull: ACCOUNT/gatk_picard:VERSION baseCommand: [gatk, LiftoverVcf] diff --git a/cwl/preprocess_liftover.cwl b/cwl/preprocess_liftover.cwl index 2f725d9..521712c 100644 --- a/cwl/preprocess_liftover.cwl +++ b/cwl/preprocess_liftover.cwl @@ -9,7 +9,7 @@ requirements: hints: - class: DockerRequirement - dockerPull: ACCOUNT/gatk_liftover_vcf:VERSION + dockerPull: ACCOUNT/granite_preprocess_liftover:VERSION baseCommand: [python3, /usr/local/bin/preprocess_liftover.py] diff --git a/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py b/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py deleted file mode 100644 index 4214459..0000000 --- a/dockerfiles/gatk_liftover_vcf/scripts/preprocess_liftover.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python3 - -################################################################################## -# -# Script to validate input VCF file for the liftover step. -# It runs the following steps: -# 1. Check if sample identifiers in the VCF matches provided sample names -# 2. Exlcude non standard chromosomes i.e GL000225.1 -# 3. If the VCF is not 'chr' based, add the prefix -# -################################################################################## - - -from granite.lib import vcf_parser -import argparse - -#Constants -CHR_PREFIX = 'chr' - -#list of standard chromosomes -std_chromosomes = [str(chrom) for chrom in list(range(1,23))] + ["X", "Y"] -std_chromosomes += [CHR_PREFIX + chrom for chrom in std_chromosomes] - -################################################ -# Functions -################################################ - - -def main(args): - - output_file = args['outputfile'] - - vcf = vcf_parser.Vcf(args['inputfile']) - - - # 1. Check if sample names match genotype IDs - sample_names = args['sample_names'] - vcf_sample_names = vcf.header.IDs_genotypes - sample_names_err = f"Sample names {sample_names} do not match sample identifires in the VCF {vcf_sample_names}" - - if len(sample_names) != len(vcf_sample_names): - raise ValueError(sample_names_err) - else: - for id in vcf_sample_names: - if id not in sample_names: - raise ValueError(sample_names_err) - - with open(output_file, "w") as output: - - vcf.write_header(output) - - for vnt in vcf.parse_variants(): - # 2. Exclude non standard chromosomes - if vnt.CHROM in std_chromosomes: - - # 3. Add 'chr' to CHROM if not present - if vnt.CHROM.startswith(CHR_PREFIX) == False: - vnt.CHROM = f"{CHR_PREFIX}{vnt.CHROM}" - - vcf.write_variant(output, vnt) - - -################################################ -# Main -################################################ - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Converts genomic coordinates between two different assemblies using pyliftover.') - - parser.add_argument('-i','--inputfile', help='input VCF file', required=True) - parser.add_argument('-o','--outputfile', help='output VCF file', required=True) - parser.add_argument('-s', '--sample_names', help='list of sample IDs that must be present in the input VCF', nargs='+', required=True) - - - args = vars(parser.parse_args()) - - main(args) \ No newline at end of file diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_chr_missing_in.vcf.gz deleted file mode 100644 index c9be755323b1ef54b4df96f02bad3fa903339c53..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3184 zcmV-$43G04iwFn<@8V(r18iw#bZ>TLa$k01W?y4ya$jv}b8~5KXJ2V}T@h*NygMG-N^dU%7wtL*Z5VyMy(kuYg%Q(uyP5-CquunQ$7HzVp zJ~dowY8E9wR|V)%Kqs!N+j>nvF9e9|fQN>z)dY;A03oJ9ZL20=mI99E5|dhW4OS^& z6HuX9c1^%81x%Odj%C$BI;DVN>6QUFYXa_3fKZ}?uQdUpl@hvX>V|F9K@cKIXqsh` zx}$_1C3MFmj%(H>j%*TG?>e?qmpIa?*}CDHMx8Bkx5!CG2r0vsrHqq( zm`lkPE!as-QpA;Y*~G3VNRCUFr3|+PmlPr0fil%BCG+K&S|fVmR&=?fMVBK$_l6|1nEmbDXGFn1JB`2Zi>n)XnI^4)Y$;B0VQcF1}GB%5llP;ed)hc36b0vU5LKZV<%f=+K3sCM(8c`kv*#(>pn&~BS z6l|ce#i&&gqi%~fvm$01EqPQCgA8vD7Li@BUD8sGsZkNL+?ITk>2+WPXv78sj4TGj zNK4D4vY6$VE!b*eR?8-pGsLlB&T7V1aRJ&u%LbGQfsWFWy;SlPvWlDx>$x_`1sMN~ z6GCR*gQnS*cAO+(_lf#9osB-aBw)*o+!O=rl`WfJbQ zFpl~^On3W)EyI+lX_g$8*&zwG>T{5U0Y3ksqIGfa|1vLC5}|UDC1F%kiVxIyeWEhT z>I$z%vqE22=$l3v+pwgna2T%h1D6^`s_(Z~pN?Bg`VcZfcFVg+n<#RVVP3hif4VX6Xaak8o!@}~2Ti^3E=W6F!8b-2p? z$yo9X3g7UFl?SS128{N0Q3{tKlZvG2j7C&?*a-PjCw?L~tku zTPwEBiLV;D8Tg~=uomAYE7KwJy+dt}2@8H_QH|Kq{b=UDBJqSG_KZ;BY#lEljUi@P z+@klx$D3DX;I+s((Eb>}X4wZ}q9WN@xCENv^dE2I&5Igjxpi)DA44G06O)V4?JG!G zus;qwn1A2q;(j_?e6^!tl(M9$3V^b&ccDew^=Rxre1*z?NAYK@0P9Rmfs2Y}o}ED? z-GHZu==XdH`c*Z4H=TU=2J1<3R}6v&bw zO7W6gYff?8gb#niQo?pI?8~aDGh6jvpJtvv{WyEL{WQ3Kjk&3)w2OQ7w?nk#>Y=>u zgCs-#fKR^p)psH5s)4#YY_sqpSp60(A+uhJ0{y-ts`5HyTd3km{0z}axnjftV%r1b z0m56yUg+2qDWgSMvY{1leZ>*+!n5x!cb3EpEuc^d~eGo;! zG%x&sB*A@|9`+dQGL;jy=ED#4<6`(JV8D375&)h;YSa-dUWovhRMBqwwFWzUfmr(a z31&BS{!WPh7jM4*S?jv4D_~8p{n4EuW*djAmnw)c=oyZ#)qkkNPxE5GryXwb7zNk{ zF>I1;u-_wPg_!#8a|m@T4R_%-NZ{`m_8x{JoLXR{P7x|JPA;!;NCn zEO&T=@ta$1oCM2LegZY!;qEGwy!(m3S%QxZ>3c!d8Wt8!NrVNWhHJ#8O9crxgZZsi zOpOfgfI9^uU}nh}EY!M29o#IQLy|5;a-QNV!;!)u$+tz6ZC_Lf$)?~7F^WXz@DT5Y zkDM5@jvB@C)3n?NyquS_sK_wxS@e>7);%FIjCgOVpyXX5g^`|NB!Q1qz@l1xxhQo| z6>EhfHz$f(%Be+g;_k&3Mzmw~dlv}gqFsr7c{%pqH$a8zSFLlmO)!b6z7xt-*a6!R zcmfkSB5wOc_lJ0Q$JSNlI8%Ok*~AAt-xMdVCA#t|?QjAsnp#T(tBAp_uJ5ER%j!}v zoYj@dlVqvv)4b;oX2a1CT{4F8Y(eU5>xUrXmG!*gTwoO zWZ(&2xK>o^p~`}C5P&PZ=YvAjghy#)ijuxWx2v%LkbhHLmiDj3x z!u63?;uOS+D`fW6`<6uGOxvUfAUZZ?8W8N@i-jy(hHE&cU>QamVA(KjiRFpH8Q`9Q z86Q0dioiFtEwT(r#1e%JV-S2_aU8prOTtj$HJHl1Hc9ETNv5UCX5q0}I*c&R`RG8S zH?)ppVj6Kjbt8Atax^)Sgo~oR5=BOjAsb+5h1WdxzKFEsuvc>!mkj)TLC%!d#8hqJ zYi&*V62G)nEU87sKUwJ2K%TMXfBlj+D>W(xr%A&X`OO?wzId79G4Za4g`2{B2KKNuHp68X4Z})*$RB(?k$D+XPa -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT=0.999"> -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##ALT= -##ALT= -##ALT= -##ALT= -##cmdline=/miniconda/bin/configManta.py --referenceFasta /home/ubuntu/ebs/hg19/hg19_broadinstitute/Homo_sapiens_assembly19.fasta --bam=/home/ubuntu/ebs_temp/sorted.bam --runDir manta --callRegions /home/ubuntu/ebs/hg19/hg19_broadinstitute/human_hg19.bed.gz -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 -chr1 10613 MantaBND:5589:3:5:0:0:0:0 A A]12:95154] 26 PASS SVTYPE=BND;MATEID=MantaBND:5589:3:5:0:0:0:1;IMPRECISE;CIPOS=-181,181;BND_DEPTH=77;MATE_BND_DEPTH=124 GT:FT:GQ:PL:PR 0/1:PASS:26:76,0,186:13,5 -chr1 869444 MantaDEL:11298:0:1:0:0:0 AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG A 322 SampleFT END=870284;SVTYPE=DEL;SVLEN=-840;CIGAR=1M840D;CIPOS=0,27;HOMLEN=27;HOMSEQ=GGGGAGGCGGCTGCGTTACAGGTGGGC GT:FT:GQ:PL:PR:SR 1/1:MinGQ:3:372,4,0:3,14:1,3 -chr1 1530481 MantaDEL:11359:0:0:1:5:0 TGACAGAGAGAGGCAGAGAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAGA T 602 MaxDepth END=1530563;SVTYPE=DEL;SVLEN=-82;CIGAR=1M82D;CIPOS=0,2;HOMLEN=2;HOMSEQ=GA GT:FT:GQ:PL:PR:SR 0/1:PASS:602:652,0,794:27,0:110,24 -chr1 1598413 MantaDEL:11364:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 999 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:71:999,74,0:0,28:0,26 -chr1 1649485 MantaINS:11457:0:0:0:0:0 G GCTCTCATAGCCCTTCTGAACGGTCTGTGACACATGCATGCTTTCAGCTATT 887 MaxDepth END=1649485;SVTYPE=INS;SVLEN=51;CIGAR=1M51I;CIPOS=0,5;HOMLEN=5;HOMSEQ=CTCTC GT:FT:GQ:PL:PR:SR 1/1:PASS:82:940,85,0:1,0:0,34 -chr1 1649680 MantaDEL:11457:0:1:0:0:1 GCATGCTTTCAGGTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTATTCTCTCTATAGCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGCTCTCCATAGCCCTTCTGAATGGTCTGTGACACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGATACACGCACGCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCACGCTTTCAGCTAGAGTATTCTCTCTATAGCCATTCTGAACGGTCTGTGACGCACGTATGCTTTCAGCTAGAGTATTCTTTTTTTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTAGAGTGTGCAGTGGTGCGATAGCGGCTCACTGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCAGAGCAGCTGGGACTACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCACCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCTTGAGCCACCACGCCCGGCCTTCAGCTAGAGTATTCTCTCTATAGCCCTTCTGAATGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTGCTCTCTCTATAGCCCTTCTGAATGGCCTGTGACACACGCATGCTTTCAGCTAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACGCACACATGCTTTCAGCTAGAGTTTGCTCTCTATAGCCCCTCTGAATGGTCTGTGACACATGCATGCTTTCAGCTATTCTCTCTATAGCCCTTCTGAACGGTCTGTGACACCAT G 999 PASS END=1650642;SVTYPE=DEL;SVLEN=-962;CIGAR=1M962D GT:FT:GQ:PL:PR:SR 0/1:PASS:999:999,0,999:80,9:106,35 diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_non_standard_chrom_in.vcf.gz deleted file mode 100644 index 63c9e172ea63a2d1cbe4675c8e56a2636fb49ad8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8441 zcmVF!h18iw#bZ>TLa$k01W?yb^ZeMeBVQyq$a%5j)XmW3DUukYG zc4KA$%v)`5+t!l)%=`-B7TBhMV)ML`lfP9#W+0r1`LMEYsw0&di*dXI@Bu_ucJmUfwQO57W)z`|RYC-V-mrqmxI|O?fCo zDZ@yHaBjVPT3wfidATT8aOvuOHeKAzZs2e|5_@(JX;$TJxhfYxh8!mQ`}_CHhjRbv z*$J3)jF`k!;yUqQf)QL2qmExy1fouqrND>vP4uPr&L=uCBT7?|~ z-4F<2w2W+rz|;hBm{`oFB_34*j@(P7L_;MbaQ0p*5&Bd%?w3n>D}qN#-A`paDh;0B zZwOB=WyO|9FqMvcD!mnlj>=!rLL~K3iNxm>iKy!!PA=tB>104IZ--P2Fx5}x+bcF< zuk<2m@k+!1uh^qXqJe$X+NVlnzZ0q!uS5;Fs`Okn6ybn>X`jk$0imO6@v^3WA4AU) zT7YT#bqsx~L=DJg+f)Nq6@EC10lBp26Iv-fAeXPBGIBsJUwUPe0ekr{VdAX}=BRwR zOfn!>=X)sh0;GX~0Q3z48D+qGrKc%) zmDnoQ9XC$}DF5`9LC&)eVHnx4sU#u#82?VsPu>#E>Dk5QadvujJU)MV_*kwE!*TKX zu{=DPPw&P@<@$Oxd)&;Hi^G4MK0L0<>)E;-tv9Qu>&??@Iv;(Su4Yrb{*M;dtM|n} zFKU*8Szc{cvqgs~UKlNJw^%MVU50Ot&ntd=%0 z-^+3_x;#D^L5ns@-xQg;ddDyK(i z?+4`eaaB%#D;FKZ9=|_6FP`Ce%&7c?qC#)IyaqRhJ=-h?_`ZI7_Dm0a!#)SxKTaW7 z9;dU_2#vj&U4xmS`(MtNcb`Yoja!}8dtAasthbiDI)3*Iqg<8$exl9%`)gjkKRv(t zVnt_*b-C(80bus!DhzOYb9_>K_yU&yy;%N^2ynX@t-%&8&g7lbg?9q3J}rK8x1e6_ z#V=3u*I%*vYIb-3Etdb!^P>+(#}~!x=b+PNIlTcGdW5}!4WBR31vabcVvUEiTEB~9 z5PtX@E3L|hCB$WWsU4yEUq7E`#p&Dg5AS|X-#llzQ%2gW_oIJ3Ev|_@!0Y35wL$xV zJt_D{FK6Z6XgYd$nr~*WrZ@kZUV~?St^oRnF{ASBtegXiua=)+chaswB;H=jJ`=Bw$$;~We;!?Z9SeSeW&U44(K0ibF_8v(8a zIl@t_NB7fDh-m9Ih|SvqySFATThhOKjvH0tkKR2&y4rm1a=ECTG2g-E94Ex{*`MXj zcyw{UDsNt`A<)pYv){`3?0&ff)1$n1Zn-t&83>l!*`lfq%F+AmWVC*Ss_J&uzaJNu z&q3m=$LV4LmL}i_Nmdw_>!(ND?An!A<;a-v?EY|6p;`ovcGhfbbKDUK90X|2^Z$|$y zI{Nvdiud)HR%~_w!e9wOGM_#^qRA?<(aYawfMe^~!)!iX!OuU-AD}40ss&2wHHr#} zlW1;ckGT4%`~$XbFF|F)`Uf`E zE!(PHUeUDt6ya9-D<>|vzY;sekEnotZJc}8C#Eo?m&|+w5ip;DonQ$k5v{$&`}OkSvb=3G$C~ob&v(lw>~E@$ z>l#&g7I)~ts-!lMz*=Mw)!j&%mzypX6lYy#ekEBm`}ty0q~}M+XPsUYz^Z&09X)~E z9vWXi+{^*95BDEtiy1({&2<0cY_WfhJ=EP9xaV~5@$=}_tJ(}F_Ayb>o1Vy{t52?fLHdQh26XR^Tl`B z>&v%i0W_Pz=}`b3bO3e?FbD#6kpL@{fITy~O3yCd9E%qs5TO++Ap1Z~IWZ-OltGm;gX8m~!#E0MtPdO}K=>Wd81V2_tiuDEHyw&II6iU! z5RPSZz=N7YK7wBTaL1}0B$!-X1_Howme%Lp&kR3;5Ktxqs~1Hb zGi)1DxybNJ2C8RSy;HbB1(G0XMpi(QG-JCES7e$Y%gW?H07rPP0EWm5{}ud^!MF44 z3|A;n4sbeXEjaVARg>7nVUJaXjcT*Pabs5zSBB*ywCuL_1CBH6n7)R)7VBi=( zELz#>M8}4$45bZNS)0hSavqQcNHbWbK+VBKpc}5uUWP_Qh(ZhFCKN>w$5F?Zc%e#b zSjuT_M6I(Cr?twa(%94rqZqq%j3LqMa2y+SBhpi8r=pER-;r2FPOl>HrN=Exacfy? z+WUfc+EX~|D2%tHg}cF7D(kaU=f>x*mD4Rny;gNgXIF}?8?%m(JJu?`{Fb3rI;Kds zorZ2I`fb?yM=(WZ%A6o@l#{<#H{~vd@n=peQp!jXh|`p_2kr*lgR2#lOc*|RCFSc5 zDJk63tU~aNZbwj{b!pku30Lp|N=||jP~Z}nu=0Yn2WGr6Xk=bA#VsSF6X(pA0ucz8 zB`I7Up?f=0WV@Vi&>^}O_jY&d)@;YEJKZABDp=@Ws~sW5Utmd}bS)o*8vv%-7;8;v zwd}#z#5So5j*oVwCtyja8IJKNh947XIL6AnXkMELA{*Jb0T-DVOzQN7ISO^cj7$3e z^*`U=vn_w83(ngq_Yl>9x99Bb*?_l`NF8s_YHu%wy}fbw+;?}zCzdK_@qKsCoV(|Z zyBCA*p4aZ4S7JaGm9WFDoDxN067}q{l1aVCaO~yLI;)5#t+aP&mp=$ACrYC~Kn|a% zc&y|^L}MHJ{t$glSE}V;Qwl;%q?FFng*0Q;R>~S>8l{xM?5RDa@?Tgh=(C0Rg@Uo(svU~88HGDb z?1G%%c}c-VT|vH%PlK!M0D~5 zmMC(NfxyRawj|a4^(ly|yb6t81?a8S5VdxOQd{~&p!A7^fbxZp@x5kWcoiA<5l&(> zH%4yC?Qy}yd+ss{I!BeB=r_tNlwmmFa}Idw6H-n&S&`St4Lu8;3@eI)a0&@%y_7R@ zmNH`US)xiahGDv&XsJ9QjATIzM)cH4>O{+2GQ)nP7L@oA&=%W*;$5J znw_;jw z_7=RsG|S4_+L_x-ZoRl|<*p-jnvhYr9V7>+L?gn2lNfa_1Zwo3qa^2@rwz!+G;BB0 zw)Pw{c*=vcYAfky;o1Y2W%8nOE80EEmyB#KY4IXXDCZnx4(hzgsVH?(KU;9uQ|BYL znFC8VBBh*^Rztypho))eA?)eg?Nt6%HRH&m+mps7iu&fvi3hE4yla~JB{daPeq#fl zN!_2&3+sUqq4fGy5m})Vf2>+XYU(V(`&8w1Mjh9k)2y3I4f0TS0Uoe8XrK_}vmBfm zGh{z<{iwquj<~{=aG7(CyefR$wAYWbu#)`%W`Z8Ma$VwXqmZkXF1X>s_Bk5A*O z%Ev!u2=EU8e20V-nOINbEO(Vn#seva-;5t;(6oXoBA$`(s3hZu9W)rh0SQ^Bf^!0a zoP(Wljp|{}s5E=Ya+jRkz=O*zw*psRRM6p#RmsK)$_+P&Ke;;RV~iE3X+&r|)7vn% zeU(-cHxB`lxG}!2jIW!AEBVeA5!M3$KyQm8Ef(~sb`pou zR@Qe7q7tH!U1@21Wsii}9!@}S8pp0#d==|)Jp6DRR)NA%jB#`iEvg;M(3jTAQ2Ecv z!z8hwA{9p(i=%qpOTM zeO$~VabZS%9UXCBS5%_9k)AIqCwkW_=Pu!;98h`H zwPktL*QMQFIctl`HE3g1_aMr%D7S!Zi)NKY3$Oi5J7PASa_Y68RZCx6l%HADVnV_d z#`bkK2d($P$c3#my039GVKX~~V-Ndm4Su!?z+rdW8J5OQWM~~KLSH#C&?`yndwOCb z-5Pxe0ceSzs_F!aYIGw9n>mP%uy2qP8T}Vw8pY!%9t_h2I$({{LW%Rcc%1m63k|@Q zbYzb-eMTMGZPI<@tYxhF=Jq0iR&HBnI|dMSGAAI&u*@n6ClLv);t0V|x1dFs7_x8A zgbxiLy3eF<$ft~GJ>0cY_f7f29+L|t#_(M!AOP^dNW&@^G0`ESBlL(93z|ePn!n=< zv{bS7C(qI}jpIas+`mXMNdWMH$uC79BK?fXbBjsO=vfIhhHq~Kn}pp7Y-9#?(h;N_ zjgtaBF`&J#5a7EGO)EQvR`UnR6mWr03EjQk;A-~zbkRDI<@FOj%=LUacmFH2adiHR zG@T3l7JyG#5u}-W^iQ=34wj(Qw9<2sFfskP)2q3}Dg3r`!eS-5B#se8jL#rs2)T3I zatC3+wY(=0;L1^==u>;;K0AemjIPu#X@Ga|3#mKj_C37%-jxH%KtO*yFMD2=8xC3# zO>6{OTEIRnyg>;oXh7^1(8MB^?lYXo3AmlVnQ5NY8&&ulmu2_oXA@y&eI_UgJ^M;-YB}J^6MykAa#=U{!7uzI1~a zH?D_s7A-4m6es>mA#sHOG5O4oaQ3G_C~p!A_r%TU5T|Xezoj~K^A0!p$oylTB&5`( zW06!GwEq(7u)Zntj;gO}NK({S>1byLM%RsTqN`OhfiRj#(*zG$K*x#iTSDr%@^fmE zQ#b9;-D^X6H6AS7wX~YaaMPSQx8v8YsDRDcHWYfy@ z_U+e?KLr)aRMG~Q+|te$x@9fAPE~s&RVy+h7D?I=(3CdJH07Lz?E!W4Fw;u}?Ssq2 z^Ax(D>vm%N+5JJ)DmyI-|G>radN~9DAB8xEoQ0Xo@_*iz&+{ zRWyo;NjEXqi9e)PV@z!bB}Zh_S-I;XvHYkt4pRa2!Ay_H5xgW<^B*t?B~0OQ3Z+g* zkVs-CQ-vNR_Np*XO}7svDW^lA0E1U{tWw!+tG{2)AJV4 z-#9O=tTN-oOdiLBpkJWk@p_)MMENOlz|)R7yX-CDFHPod73o}u7< zm+DND>WuAV*mBukL3FJ2WQbu8iP%^;X?eO|P*&+Nsge7j-LH$oN9mGn)f-&cl{SCYy$42m^(B|rF2W7@S)1Pm8VAIzXtw6`|(t3YZu0;IqlW6*3O~N zrPkK;woa>Y^}`)_1q2R`bgoA_TcWX5XYFM=Nk=u6TN-}(QPlpWkv9q6rtE)VYT7J? zIYDkSOZ8rUP1n}lKV-Xq2tMH57-f=uy^9c}ZHv+r(*X0qZH)80GZZhw`KZPDMD#c} zRXfp|6-J9O%<%F!`BQ7(9Lx*%=#}Zl#PXN)9AhRX(FqzIP#`d$2@#o zsbTmohm~rgZ8VEqsVtP|u97?kDlcT3K4!*bcWbNL1gfepTStsfMA|$)VsRL7u9cY~ zK7@^;V%WbJDoVs5Bu0RZBQl#U8Te4(DO{3>dUWqwfDcY9ToZkZBhmlBmwp`Go3B53 ziqa6DVUihNAfV)NO*DMj0gfAzFYKKtopTNb#+7*#{-(zd)#Hb1@k4d>>kt#u+xX$+ z&;%f{P*;MZ77Q~@0CuArnXWL_6YPLxb&FxZS;M~kDcwA7Hz_Q33&$@#QKeK{0B;1= z&qN;(EI_qJO-GW77%yD>^XwTeY&PIRRG>y}=VG@%o8V+9;&DG~YX!ZEelM=(BJO2U zeoyi}bLGn};QE&8ANh!rQT>KLw?%jJ1;c74v)ZaAbs5)w!|&cfxJrAZ5aN`By9m1u zjvD}kdG8rGqhW~q$QJLWA%ttYejD7lK6`E4?=8e$tiJ~N=|cKNay~~K?(!qdKa9UO z6y*14swQn!_;PT<@<1;0G|kj%iz$BKg{jk4rU}Kc`3+@8_`_+A7h2UC3Qmhs;~c$t zl8B!XIgEsGxHx5@l=t(@tgem-8W07Ki8g0%Gh_*?xIW_>@{R&dE%73pZuAckdx5mwx}bYkRud*b0pv^m`TP^j}y6 zu=ql@#nqgLkN4Z0iD^=)okihbR;YuD6U0bi6$0>?&}k*bz`oEawk&P2*8W_-!iPa# zst7lkFBirEN7$kxs<;_!D3q2W0r(L2fMEJVm>}s~wL3V;{xX1M6^?!ODzB#-DgO5XLod+nmw30-xQcrxLg?9>@%J1N&&zui2ZcPs zJaol{)$>;j}Zp8eTm zFFg1J3->AwQrFeRraFpqXko4`%5w{|^hMPc=0g-M@>Y&5%#80DQnA;czp;T6@mpzK z-|XfWOYrjx-S)KgVO?dy$Sbsl??%}??7TpF4vFKYJ6tVkGW9!wl3xbYu z6sj;LY->&d(OZo#7iv_aiFYzAEVCuwH~t0cVdq}{5C{dsR*0eKF)cQw=@Q){r9 zDM4j5?uiIs(9DS_^aQN8gFQ-8KV9OOCh4x!gUaWYEP$tYe6M3;HK5!~lu=u^GL8DK zIK^-Ot~t$_%92x3`>)t~Obp6v%G#S#(vF**N+oeWvWu)4xe9NXT6c*k#cvK4WQ4Ij z#PKbTIsW6?-^`n=u#K`@-6Aix_N|Tmen>k7eT_h1o%B&f@R zAwEav&gbwF;j_f&C33aBkQ{=iupZ(NCKlpkU*}QX?tF$W(WOt(vVXNV7+G;|lOUJ-t=L+I$!;HWA>ZB^nVR#k+P+pU?uKStGoRf>a&oZ4N}RR5II zkrRfKSL|8tF)xRIMjq`!p7kO8nGzax#0O@PkGz5adWt!uB{Nlr00E4aSIax3do<3Y zHO5FI$84`SIM%V+ayFj_K40b9tQyVpIjz7@=%Q6f?|kUzQN3F+AVYZKrL^C%YkPO9 zd!r1aeq#oww6~LJRd_uZ*>HpDA>43nVPbI5eDu=2*tf@V_g%mz=2a>8VL6sNq7CaW zQYESq69Hlv17v^S6G6+XczB9D*P<{`FPGRbxc3KvS8eet(jUiZQ_fH2G>fD|7E|f3 z@HtcCX^Bt=t>Q=SP$|DQmAozpd=wJKxF8A+OQHn{Lw_nQNN_RT79=KlC9ZrUa9Eoh z7&){Da8wsY-xG(Z!7S)92F6f}E^IY4VaDLyD*K0?{KH_gZpyChCI0K|;loe~lkMP0 zoKwD9l6msl*{PFUo7PjKxov>6h3xP)8QzY}V#_Jvn|*O9bzOZ$(9&mA8s$mrmx9SW z^VfDEKsv^!S_AX*u@bsu+y8Z8(>6z)r_7-I;MpY!^(lXN?fudfrjyk}CacuTqWk1r bo>P54r*u~FVNB=-L$2e0w2P}0{cZpNJyTq@ diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz b/dockerfiles/gatk_liftover_vcf/scripts/test/files/liftover_vcf_two_samples_in.vcf.gz deleted file mode 100644 index c63e287e86de7f73db7c073636614665cc670c0c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3202 zcmV-|41Mz-iwFoD^5S9u18iw#bZ>TLa$k01W?yu7Z(nm^ZE$R5b6;t0E_P#P0L@u# zbJ|D}{;d28)u!q;slr$@ddJIFaRL+WY=VvAy8W=_l3~Ew1&Nh}vrgTQzwQ}HjD*CJ zY(AV{K-TnhKmBx1cMHFLy9zgK6(`#u>pytcFGiQVdCPAOgN*g5MyX~~4eq7!K3TB- zm_;msrTK#wM9Xjqm#Ip+Ru^Rwwqgm3KnykXE-x>i<1M?~KkcJzf61QG%jY$5`OD)| z5(mpLO0zKAXYBGryp12zU>C9|eGJl+ZJ#!;#O)nt*!{Ae89fYfXS?rG##px?vl25QIn)nr4}# z?kJ%L3EeS?PDB@T3Iwr;qlQD=)B_|&MW6Wy&lOG-jrO*akOtkXqG zLLEX}laeDFC84&X*)}zfV3veJM5yCBbrZDY)6xN^N$VzPNvGiu>K=1NOEyiSYldmn z>Hg0lD@VxBC66j%km1e2BC-p%OIoTiH7a73+mdfGy$*~3jo4s-k;Pyb zX=#~M7PB0)1zSzbYB{8GhB!9NS8)HK`D4wEFDK2iTB)9a61W^y~bA9<7cs59MfpIFk@I{xd9^{+R(fD*RTTSoWE*NWC9jtMG`X4sLuk*lgH_rBySd z>97X6(Z~pN?BX=UzYu-)%nI1*k_$euCECxP!&C*<;$&GhO6)WxxYGL8~lSJi{%p5Wzz+ z*jlnpPJGqK?Z6*RhPC)MS(yxx?=5P3N?7nai)zG<9!69DjKmX)*b731XX|(YX$&#T z;ugIhKHi?0fj1)OK>Jewhh-OpiHc-r;R0xer~h~puV2+5%dK;JyBGqI9+{kv?#>`( z&i>f*VE%oZ^M}cF{?(3#QOc60Dges9-h~!zH=}F+@hepRJBmMJ1z2Tj3S3k)^Xv>F z=?1Lsqu=u_=vUSF{bc;%8>}bc`uTg9|L1i0I2_IV59d^;dloFA4DBE`5b#ZmDUc;W zl;S0~)|}#a2p|83rG#x`IG0sZCywgBK21G;@^Sij_i1o*j=71bwDSk`w|%tW>Y=>u zf+R!!fKPt-)psH5s)4%QZ?fr$*`~uNQxnjftVw*kV z0m55{?NH4jMN<*2X!Bb2Wioxx|GI-lFDyyr0HkWXgTAEyhz=xd7bjSR3i6IjnBQ)~ zJYnMc<2}S)Ocv+V&M}MP3>NE~ug}>$54O7v5FBD#=%_!=2J`uk*czZzWjqj&O3)*6 ziBx?KUa+F2DZmzCf#{`N%Mtp!bJD1QuiovUU1hIzEweHi$0xK*@e|@r_=PPy>g+jT z%Zn5a8d4kn&Nkt590T=;cb%I{q0hi!S%pzC8!+|3yH?X3OjWC}IUck7b1LzC7eo;- z%?m#uN$^~z`yB?mOyz{F`0xY$xEOv47%-l&1c0ZI8g&HoGZ6rjD%wrI)?kNk5L-V# z!|bNc-U;#l;?4IzYhBlM1+2-9Ke`vhY~pbFS_N?pdWNGL^&jf+)2uk}X@^@pL;;RL z42NVB>~=_5A*R0j96}vS!)>?;5_tc@-osFYQwxmLDME$D$z_(|4y~)~PY7Km#q9v^ zHu!Uhb+I;_@=^)uy9Uo7xiQ({^#92bZrKumsEg+W+I1RlSzcKq25{mCNmHL@AQ8@Q2Z2Z`*m%P8QbnIQ{K$GAb|Gw(GbS|Eh2(HF&6i1 z4PK;Qf#rD*`yX+o3y-kf^_Mqqy$|;vZxwjhR3<|Oo}d-rQNcr?h!jz9LQzC!D)YhZ z>}Evrclw4XM6-ylaGqec?3t$H_Vk|F)5NzjPzJve+H*}}8ow&kQg9$p#0LmC_g;x- z!+x6va#enWamAmi`xnF^9r(Q}XW71OUsb2uKB0y(_IuZUZ+zFA-SlSnigrnQXuLTo-#Xn)MZS9oK{}bQ8OWeO#9H3I@lqyA2RYucc z-?2677*~09IEHq`KauK_Teu$Pr9taZ`$~M2(-&9pG~YjAwN@i~^LvFr zG2q=6ijZ#8j?vM2dWRT2($P;YVlgyz&2WfZEp*crbqp(-;*Zg`_#T&6(znNd%N4xf zXz{N>c|U*m@Kx~@OQT9Lk#gP;hiU0e#L)w9RHCh9tt4&Gz)e{w!D7!cDU?dvHG0$r z8VCV3#u*}8AZ>)stO#8JqVXS7(j-BzaB)*_scUU=;N&T9$f}$QL3PlEs*q5=YNh4*|=h`R907cU2wuiw&UrAEczIHmX|znR0z z7cYaHCNBp9S7UxyicRCZ0O0$WTjc#}fL&lP5UmTN!S|pwzrciN4_5|aJ+H*PRt&tN zeRzC3RupcRn;qofd~t!uIZ$3@&;)kNu`j#_Wg;+0vM@Tpka5zvW=}?e0MhWrt%52502<^e9RL6T diff --git a/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py b/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py deleted file mode 100644 index 93ae61f..0000000 --- a/dockerfiles/gatk_liftover_vcf/scripts/test/test_preprocess_liftover.py +++ /dev/null @@ -1,94 +0,0 @@ -################################################################# -# Libraries -################################################################# - -import pytest -import filecmp - - -preprocess = __import__("preprocess_liftover") - - -def test_non_standard_chromosomes(tmp_path): - """ - This test checks if non standard chromosomes are not saved to the output VCF - """ - - # Variables and Run - args = { - "inputfile": "test/files/liftover_vcf_non_standard_chrom_in.vcf.gz", - "outputfile": f"{tmp_path}/output.vcf", - "sample_names": ["SAMPLE1"], - } - - preprocess.main(args) - assert filecmp.cmp(f"{tmp_path}/output.vcf", "test/files/liftover_vcf_correct_out.vcf") == True - -def test_missing_chr(tmp_path): - """ - This test checks if chr prefix is added to a non chr based VCF - """ - - # Variables and Run - args = { - "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", - "outputfile": f"{tmp_path}/output.vcf", - "sample_names": ["SAMPLE1"], - } - - preprocess.main(args) - assert filecmp.cmp(f"{tmp_path}/output.vcf", "test/files/liftover_vcf_correct_out.vcf") == True - - -def test_wrong_sample_ids(tmp_path): - """ - This test checks if the VCF sample identifiers match the expected sample IDs - """ - # Variables and Run - args = { - "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", - "outputfile": f"{tmp_path}/output.vcf", - "sample_names": ["SAMPLE1", "SAMPLE2"], - } - - with pytest.raises(ValueError) as exc: - preprocess.main(args) - - - assert "Sample names ['SAMPLE1', 'SAMPLE2'] do not match sample identifires in the VCF ['SAMPLE1']" in str(exc.value) - - - - args = { - "inputfile": "test/files/liftover_vcf_chr_missing_in.vcf.gz", - "outputfile": f"{tmp_path}/output.vcf", - "sample_names": ["SAMPLE2"], - } - - - with pytest.raises(ValueError) as exc: - preprocess.main(args) - - - assert "Sample names ['SAMPLE2'] do not match sample identifires in the VCF ['SAMPLE1']" in str(exc.value) - - - args = { - "inputfile": "test/files/liftover_vcf_two_samples_in.vcf.gz", - "outputfile": f"{tmp_path}/output.vcf", - "sample_names": ["SAMPLE3"], - } - - - with pytest.raises(ValueError) as exc: - preprocess.main(args) - - - assert "Sample names ['SAMPLE3'] do not match sample identifires in the VCF ['SAMPLE1', 'SAMPLE2']" in str(exc.value) - - - - - - - From 141fb758f95f342479c74c68a8396c8a02ba19be Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 12:12:32 -0400 Subject: [PATCH 14/21] Update Dockerfile --- dockerfiles/gatk_picard/Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dockerfiles/gatk_picard/Dockerfile b/dockerfiles/gatk_picard/Dockerfile index 389782a..4b4bc0d 100644 --- a/dockerfiles/gatk_picard/Dockerfile +++ b/dockerfiles/gatk_picard/Dockerfile @@ -16,4 +16,11 @@ WORKDIR /usr/local/bin RUN conda install -c bioconda -y gatk4==4.2.6.1 picard==2.26.11 && \ conda clean -a -y -f -CMD ["bash"] \ No newline at end of file +####################################################################### +# Setting env variables +####################################################################### +## Supporting UTF-8 +RUN locale-gen "en_US.UTF-8" && update-locale LC_ALL="en_US.UTF-8" +ENV LC_ALL=en_US.UTF-8 + +CMD ["bash"] From 1ab4e9ac58a2218769f59ee5efde6e4e3f785267 Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 12:15:00 -0400 Subject: [PATCH 15/21] Update Dockerfile --- .../granite_preprocess_liftover/Dockerfile | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/dockerfiles/granite_preprocess_liftover/Dockerfile b/dockerfiles/granite_preprocess_liftover/Dockerfile index 95b65d1..9ea03b1 100644 --- a/dockerfiles/granite_preprocess_liftover/Dockerfile +++ b/dockerfiles/granite_preprocess_liftover/Dockerfile @@ -4,6 +4,13 @@ FROM cgap/cgap-ubuntu2004-py-38:0.0.1 LABEL mainainers="Michele Berselli (berselli.michele@gmail.com), Dominika Maziec (dominika.maziec@hms.harvard.edu)" +####################################################################### +# General updates & installing necessary Linux components +####################################################################### +RUN apt-get update -y && apt-get install -y \ + locales \ + tabix + ####################################################################### # Setting working env ####################################################################### @@ -12,11 +19,21 @@ WORKDIR /usr/local/bin ####################################################################### # Software ####################################################################### - ## granite RUN pip install granite-suite==0.2.0 +####################################################################### +# Scripts +####################################################################### +## preprocess_liftover COPY scripts/preprocess_liftover.py . RUN chmod +x preprocess_liftover.py -CMD ["bash"] \ No newline at end of file +####################################################################### +# Setting env variables +####################################################################### +## Supporting UTF-8 +RUN locale-gen "en_US.UTF-8" && update-locale LC_ALL="en_US.UTF-8" +ENV LC_ALL=en_US.UTF-8 + +CMD ["bash"] From 5799f95811edaae5d6261d1c3609a8df14ff1269 Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 12:15:42 -0400 Subject: [PATCH 16/21] Update Dockerfile --- dockerfiles/gatk_picard/Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dockerfiles/gatk_picard/Dockerfile b/dockerfiles/gatk_picard/Dockerfile index 4b4bc0d..107d6af 100644 --- a/dockerfiles/gatk_picard/Dockerfile +++ b/dockerfiles/gatk_picard/Dockerfile @@ -4,6 +4,13 @@ FROM cgap/cgap-ubuntu2004-py-38:0.0.1 LABEL mainainers="Michele Berselli (berselli.michele@gmail.com), Dominika Maziec (dominika.maziec@hms.harvard.edu)" +####################################################################### +# General updates & installing necessary Linux components +####################################################################### +RUN apt-get update -y && apt-get install -y \ + locales \ + tabix + ####################################################################### # Setting working env ####################################################################### From 6f2ede0f8d83c33367a6c687227830f5ea2dad7e Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Fri, 12 Aug 2022 17:45:06 +0000 Subject: [PATCH 17/21] granite_preprocess_liftover Dockerfile update --- dockerfiles/granite_preprocess_liftover/Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dockerfiles/granite_preprocess_liftover/Dockerfile b/dockerfiles/granite_preprocess_liftover/Dockerfile index 9ea03b1..4f9050c 100644 --- a/dockerfiles/granite_preprocess_liftover/Dockerfile +++ b/dockerfiles/granite_preprocess_liftover/Dockerfile @@ -19,12 +19,15 @@ WORKDIR /usr/local/bin ####################################################################### # Software ####################################################################### -## granite -RUN pip install granite-suite==0.2.0 + +## granite, pytest +RUN pip install granite-suite==0.2.0 pytest==7.1.2 + ####################################################################### # Scripts ####################################################################### + ## preprocess_liftover COPY scripts/preprocess_liftover.py . RUN chmod +x preprocess_liftover.py From 855cbfbcaa8a43954adf1865954d52e7e6a49899 Mon Sep 17 00:00:00 2001 From: Michele Date: Fri, 12 Aug 2022 14:21:05 -0400 Subject: [PATCH 18/21] Update Dockerfile --- dockerfiles/granite_preprocess_liftover/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/dockerfiles/granite_preprocess_liftover/Dockerfile b/dockerfiles/granite_preprocess_liftover/Dockerfile index 4f9050c..8a546be 100644 --- a/dockerfiles/granite_preprocess_liftover/Dockerfile +++ b/dockerfiles/granite_preprocess_liftover/Dockerfile @@ -19,15 +19,12 @@ WORKDIR /usr/local/bin ####################################################################### # Software ####################################################################### - ## granite, pytest RUN pip install granite-suite==0.2.0 pytest==7.1.2 - ####################################################################### # Scripts ####################################################################### - ## preprocess_liftover COPY scripts/preprocess_liftover.py . RUN chmod +x preprocess_liftover.py From 8ac6d7aecd651ba9bd76b8eb29f0c8b5fbbb162f Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Fri, 12 Aug 2022 18:29:36 +0000 Subject: [PATCH 19/21] preprocess_liftover.py reformat --- .../scripts/preprocess_liftover.py | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py b/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py index 4214459..6e038e9 100644 --- a/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py +++ b/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py @@ -2,11 +2,11 @@ ################################################################################## # -# Script to validate input VCF file for the liftover step. -# It runs the following steps: -# 1. Check if sample identifiers in the VCF matches provided sample names -# 2. Exlcude non standard chromosomes i.e GL000225.1 -# 3. If the VCF is not 'chr' based, add the prefix +# Script to validate input VCF file for the liftover step. +# It runs the following steps: +# 1. Check if sample identifiers in the VCF matches provided sample names +# 2. Exlcude non standard chromosomes i.e GL000225.1 +# 3. If the VCF is not 'chr' based, add the prefix # ################################################################################## @@ -14,11 +14,11 @@ from granite.lib import vcf_parser import argparse -#Constants -CHR_PREFIX = 'chr' +# Constants +CHR_PREFIX = "chr" -#list of standard chromosomes -std_chromosomes = [str(chrom) for chrom in list(range(1,23))] + ["X", "Y"] +# list of standard chromosomes +std_chromosomes = [str(chrom) for chrom in list(range(1, 23))] + ["X", "Y"] std_chromosomes += [CHR_PREFIX + chrom for chrom in std_chromosomes] ################################################ @@ -28,23 +28,22 @@ def main(args): - output_file = args['outputfile'] - - vcf = vcf_parser.Vcf(args['inputfile']) + output_file = args["outputfile"] + vcf = vcf_parser.Vcf(args["inputfile"]) # 1. Check if sample names match genotype IDs - sample_names = args['sample_names'] + sample_names = args["sample_names"] vcf_sample_names = vcf.header.IDs_genotypes sample_names_err = f"Sample names {sample_names} do not match sample identifires in the VCF {vcf_sample_names}" - + if len(sample_names) != len(vcf_sample_names): raise ValueError(sample_names_err) else: for id in vcf_sample_names: if id not in sample_names: raise ValueError(sample_names_err) - + with open(output_file, "w") as output: vcf.write_header(output) @@ -56,23 +55,30 @@ def main(args): # 3. Add 'chr' to CHROM if not present if vnt.CHROM.startswith(CHR_PREFIX) == False: vnt.CHROM = f"{CHR_PREFIX}{vnt.CHROM}" - + vcf.write_variant(output, vnt) - + ################################################ # Main ################################################ -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Converts genomic coordinates between two different assemblies using pyliftover.') +if __name__ == "__main__": - parser.add_argument('-i','--inputfile', help='input VCF file', required=True) - parser.add_argument('-o','--outputfile', help='output VCF file', required=True) - parser.add_argument('-s', '--sample_names', help='list of sample IDs that must be present in the input VCF', nargs='+', required=True) + parser = argparse.ArgumentParser( + description="Converts genomic coordinates between two different assemblies using pyliftover." + ) + parser.add_argument("-i", "--inputfile", help="input VCF file", required=True) + parser.add_argument("-o", "--outputfile", help="output VCF file", required=True) + parser.add_argument( + "-s", + "--sample_names", + help="list of sample IDs that must be present in the input VCF", + nargs="+", + required=True, + ) args = vars(parser.parse_args()) - main(args) \ No newline at end of file + main(args) From 250ca96f75d2983bdde65343842c2ea9c77f0c6b Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Mon, 15 Aug 2022 17:00:10 +0000 Subject: [PATCH 20/21] Liftover update --- cwl/gatk_liftover.cwl | 2 +- cwl/workflow_gatk_liftover.cwl | 2 +- .../scripts/preprocess_liftover.py | 4 +--- portal_objects/workflows/workflow_liftovervcf.json | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cwl/gatk_liftover.cwl b/cwl/gatk_liftover.cwl index a942b17..f4b28c1 100644 --- a/cwl/gatk_liftover.cwl +++ b/cwl/gatk_liftover.cwl @@ -18,7 +18,7 @@ inputs: type: File inputBinding: prefix: -I - doc: expect the path to the input vcf + doc: expect a path to the input uncompressed or gzip-compressed vcf - id: reference_sequence type: File diff --git a/cwl/workflow_gatk_liftover.cwl b/cwl/workflow_gatk_liftover.cwl index de3fe69..48f4207 100644 --- a/cwl/workflow_gatk_liftover.cwl +++ b/cwl/workflow_gatk_liftover.cwl @@ -9,7 +9,7 @@ requirements: inputs: - id: input_vcf type: File - doc: expect the path to the vcf gz file + doc: expect a path to the input uncompressed or gzip-compressed vcf - id: chain type: File diff --git a/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py b/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py index 6e038e9..2c01e9b 100644 --- a/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py +++ b/dockerfiles/granite_preprocess_liftover/scripts/preprocess_liftover.py @@ -65,9 +65,7 @@ def main(args): if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Converts genomic coordinates between two different assemblies using pyliftover." - ) + parser = argparse.ArgumentParser(description="Preprocess to the liftover step.") parser.add_argument("-i", "--inputfile", help="input VCF file", required=True) parser.add_argument("-o", "--outputfile", help="output VCF file", required=True) diff --git a/portal_objects/workflows/workflow_liftovervcf.json b/portal_objects/workflows/workflow_liftovervcf.json index cd260fe..7af48d7 100644 --- a/portal_objects/workflows/workflow_liftovervcf.json +++ b/portal_objects/workflows/workflow_liftovervcf.json @@ -77,7 +77,7 @@ "global": true, "type": "reference file" }, - "name": "reference_fasta", + "name": "reference", "source": [ { "name": "reference" From 84dd76e67b77a8cdbc4d08c3a6232eb099ba1af9 Mon Sep 17 00:00:00 2001 From: Dominika Maziec Date: Mon, 15 Aug 2022 17:51:22 +0000 Subject: [PATCH 21/21] liftover update --- cwl/preprocess_liftover.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwl/preprocess_liftover.cwl b/cwl/preprocess_liftover.cwl index 521712c..cf194f9 100644 --- a/cwl/preprocess_liftover.cwl +++ b/cwl/preprocess_liftover.cwl @@ -18,7 +18,7 @@ inputs: type: File inputBinding: prefix: -i - doc: expect the path to the input vcf + doc: expect a path to the input uncompressed or gzip-compressed vcf - id: sample_names type: string[]