From aae98ec2f7b71602777abc33d574c66f493e2ab3 Mon Sep 17 00:00:00 2001 From: "Bart Charbon (via Travis CI)" Date: Wed, 4 Dec 2024 08:39:36 +0000 Subject: [PATCH] Deploy molgenis/vip to github.com/molgenis/vip.git:gh-pages --- search/search_index.json | 2 +- sitemap.xml | 40 +++++++++++++++++++-------------------- sitemap.xml.gz | Bin 431 -> 431 bytes usage/input/index.html | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/search/search_index.json b/search/search_index.json index 7d5e04ab..665a2819 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Variant Interpretation Pipeline (VIP) \u00b6 VIP is a flexible human variant interpretation pipeline for rare disease using state-of-the-art pathogenicity prediction ( CAPICE ) and template-based interactive reporting to facilitate decision support. The VIP pipeline can be used starting from either your fastq , bam/cram or .g.vcf/vcf data, every entry point will result in a vcf file with your annotated, classified and filtered variants as well as a interactive HTML report with the same variants, prioritized by the CAPICE pathogenicity score and providing additional aids like a genome browser and a representation of the decisions leading to the VIP classification. VIP can be used for single patients, families or cohort data. Try it yourself Visit https://vip.molgeniscloud.org/ to analyse your own variants Tip Preprint now available at medRxiv Click here for a live example ] Above: report example Above: report example: genome browser","title":"Introduction"},{"location":"#variant-interpretation-pipeline-vip","text":"VIP is a flexible human variant interpretation pipeline for rare disease using state-of-the-art pathogenicity prediction ( CAPICE ) and template-based interactive reporting to facilitate decision support. The VIP pipeline can be used starting from either your fastq , bam/cram or .g.vcf/vcf data, every entry point will result in a vcf file with your annotated, classified and filtered variants as well as a interactive HTML report with the same variants, prioritized by the CAPICE pathogenicity score and providing additional aids like a genome browser and a representation of the decisions leading to the VIP classification. VIP can be used for single patients, families or cohort data. Try it yourself Visit https://vip.molgeniscloud.org/ to analyse your own variants Tip Preprint now available at medRxiv Click here for a live example ] Above: report example Above: report example: genome browser","title":"Variant Interpretation Pipeline (VIP)"},{"location":"about/acknowledgements/","text":"Acknowledgements \u00b6 Standing on the shoulders of giants. This project could not have possible without the existence of many other tools and resources. Among them we would like to thank the people behind the following projects: CAPICE Ensembl Variant Effect Predictor (VEP) Nextflow AlphScore AnnotSV Illumina ExpansionHunter Illumina Manta Illumina SpliceAI igv.js DeepVariant Minimap2 GLnexus Samtools formats and tools Human Phenotype Ontology Consortium Clinical Genomic Database gnomAD ClinVar VKGL phyloP cuteSV Mosdepth Spectre Straglr Stranger fastp","title":"Acknowledgements"},{"location":"about/acknowledgements/#acknowledgements","text":"Standing on the shoulders of giants. This project could not have possible without the existence of many other tools and resources. Among them we would like to thank the people behind the following projects: CAPICE Ensembl Variant Effect Predictor (VEP) Nextflow AlphScore AnnotSV Illumina ExpansionHunter Illumina Manta Illumina SpliceAI igv.js DeepVariant Minimap2 GLnexus Samtools formats and tools Human Phenotype Ontology Consortium Clinical Genomic Database gnomAD ClinVar VKGL phyloP cuteSV Mosdepth Spectre Straglr Stranger fastp","title":"Acknowledgements"},{"location":"about/license/","text":"License \u00b6 VIP is open source and available under the GNU Lesser General Public License v3.0 from https://github.com/molgenis/vip . See https://github.com/molgenis/vip/blob/main/LICENSE for details. GREEN-DB \u00b6 GREEN-DB is free to use for academic users, please refer to the attached LICENSE file here . If you are interested in using the Database commercially, please contact Oxford University Innovation Limited to negotiate a licence. Contact details are enquiries@innovation.ox.ac.uk quoting reference 18096. Relationship to other licences \u00b6 VIP is an aggregate work of many works, each covered by their own licence(s). For the purposes of determining what you can do with specific works in VIP, this policy should be read together with the licence(s) of the relevant tools. For the avoidance of doubt, where any other licence grants rights, this policy does not modify or reduce those rights under those licences.","title":"License"},{"location":"about/license/#license","text":"VIP is open source and available under the GNU Lesser General Public License v3.0 from https://github.com/molgenis/vip . See https://github.com/molgenis/vip/blob/main/LICENSE for details.","title":"License"},{"location":"about/license/#green-db","text":"GREEN-DB is free to use for academic users, please refer to the attached LICENSE file here . If you are interested in using the Database commercially, please contact Oxford University Innovation Limited to negotiate a licence. Contact details are enquiries@innovation.ox.ac.uk quoting reference 18096.","title":"GREEN-DB"},{"location":"about/license/#relationship-to-other-licences","text":"VIP is an aggregate work of many works, each covered by their own licence(s). For the purposes of determining what you can do with specific works in VIP, this policy should be read together with the licence(s) of the relevant tools. For the avoidance of doubt, where any other licence grants rights, this policy does not modify or reduce those rights under those licences.","title":"Relationship to other licences"},{"location":"advanced/annotations/","text":"Annotations \u00b6 VIP annotates variant effects and genotype data for samples using a rich set of tools. Annotions can be used to classify variants using classification trees and displayed in reports . Overview \u00b6 The table contains annotations available in most output files. Depending on the workflow and the configuration used additional annotations might be available, check the output file headers for the complete overview. Similarly, some annotations listed below might be missing from your output file depending on the sample sheet content and configuration. annotation type source description FORMAT/VI string list vip-inheritance-matcher An enumeration of possible inheritance modes (Possible values: AR, AR_C, AD, AD_IP, XLR, XLD) FORMAT/VIC string vip-inheritance-matcher Possible Compound hetrozygote variants FORMAT/VID integer vip-inheritance-matcher De novo variant FORMAT/VIG string list vip-inheritance-matcher Genes with an inheritance match FORMAT/VIM integer vip-inheritance-matcher Inheritance Match: Genotypes, affected statuses and known gene inheritance patterns match FORMAT/VIPC_S string list vip-decision-tree VIP decision tree classification for sample FORMAT/VIPP_S string list vip-decision-tree VIP decision tree path for sample INFO/CSQ/Allele string VEP The variant allele used to calculate the consequence INFO/CSQ/ALLELE_NUM integer VEP Allele nr within the VCF file. INFO/CSQ/ALPHSCORE float VEP plugin AlphScore pathogenicity score for missense variants (see here ) INFO/CSQ/Amino_acids string VEP Reference and variant amino acids INFO/CSQ/ASV_ACMG_class string VEP plugin AnnotSv 'ACMG_class' output INFO/CSQ/ASV_AnnotSV_ranking_criteria string VEP plugin AnnotSv 'AnnotSV_ranking_criteria' output INFO/CSQ/ASV_AnnotSV_ranking_score string VEP plugin AnnotSv 'AnnotSV_ranking_score' output INFO/CSQ/BIOTYPE string VEP Biotype of transcript or regulatory feature INFO/CSQ/CAPICE_CL categorical VEP plugin CAPICE classification (see here ). Categories: B, LB, VUS, LP, P INFO/CSQ/CAPICE_SC float VEP plugin CAPICE score INFO/CSQ/cDNA_position string VEP Position within the cDNA INFO/CSQ/CDS_position string VEP Position within the coding sequence INFO/CSQ/CHECK_REF string VEP Reports variants where the input reference does not match the expected reference INFO/CSQ/CLIN_SIG string list VEP ClinVar classification(s) (do not use, see here ) INFO/CSQ/clinVar_CLNID integer list VEP plugin ClinVar variation identifier INFO/CSQ/clinVar_CLNREVSTAT categorical list VEP plugin ClinVar review status for the Variation ID. Categories: practice_guideline, reviewed_by_expert_panel, criteria_provided, _multiple_submitters, _no_conflicts, _single_submitter, _conflicting_interpretations, no_assertion_criteria_provided, no_assertion_provided INFO/CSQ/clinVar_CLNSIG string VEP plugin Clinical significance for this single variant; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_classifications_of_pathogenicity, Other INFO/CSQ/clinVar_CLNSIGINCL string VEP plugin Clinical significance for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:clinical significance; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_interpretations_of_pathogenicity INFO/CSQ/Codons string VEP Reference and variant codon sequence INFO/CSQ/Consequence string list VEP Effect(s) described as Sequence Ontology term(s) INFO/CSQ/DISTANCE string VEP Shortest distance from variant to transcript INFO/CSQ/existing_InFrame_oORFs string VEP plugin The number of existing inFrame overlapping ORFs (inFrame oORF) at the 5 prime UTR INFO/CSQ/existing_OutOfFrame_oORFs string VEP plugin The number of existing out-of-frame overlapping ORFs (OutOfFrame oORF) at the 5 prime UTR INFO/CSQ/existing_uORFs string VEP plugin The number of existing uORFs with a stop codon within the 5 prime UTR INFO/CSQ/Existing_variation string list VEP Identifier(s) of co-located known variants INFO/CSQ/EXON string VEP The exon number (out of total number) INFO/CSQ/Feature string VEP Ensembl stable ID of feature INFO/CSQ/Feature_type categorical VEP VEP feature type. Categories: Transcript, RegulatoryFeature, MotifFeature INFO/CSQ/FATHMM_MKL_NC float VEP plugin The FATHMM-MKL score for Non-Coding Single Nucleotide Variants (SNVs) INFO/CSQ/five_prime_UTR_variant_annotation string VEP plugin Output the annotation of a given 5 prime UTR variant INFO/CSQ/five_prime_UTR_variant_consequence string VEP plugin Output the variant consequences of a given 5 prime UTR variant: uAUG_gained, uAUG_lost, uSTOP_lost or uFrameshift INFO/CSQ/FLAGS string list VEP Transcript quality flags (cds_start_NF: CDS 5' incomplete, cds_end_NF: CDS 3' incomplete) INFO/CSQ/GADO_PD categorical VEP plugin GADO prediction for the relation between the HPO terms of the proband(s) and the gene, HC: high confidence, LC: low confidence. Categories: LC, HC INFO/CSQ/GADO_SC float VEP plugin The combined prioritization GADO Z-score over the HPO of the proband(s) terms for this case INFO/CSQ/Gene string VEP Ensembl stable ID of affected gene INFO/CSQ/gnomAD_COV float VEP plugin gnomAD coverage (percent of individuals in gnomAD source) INFO/CSQ/gnomAD_AF float VEP plugin gnomAD allele frequency INFO/CSQ/gnomAD_FAF95 float VEP plugin gnomAD filter allele frequency (95% confidence) INFO/CSQ/gnomAD_FAF99 float VEP plugin gnomAD filter allele frequency (99% confidence) INFO/CSQ/gnomAD_HN integer VEP plugin gnomAD number of homozygotes INFO/CSQ/gnomAD_QC string list VEP plugin gnomAD quality control filters that failed INFO/CSQ/gnomAD_SRC categorical VEP plugin gnomAD source (E=exomes, G=genomes, T=total) INFO/CSQ/Grantham string VEP plugin Grantham Matrix score - Grantham, R. Amino Acid Difference Formula to Help Explain Protein Evolution, Science 1974 Sep 6;185(4154):862-4 INFO/CSQ/HGNC_ID integer VEP HGNC gene identifier INFO/CSQ/HGVS_OFFSET string VEP Indicates by how many bases the HGVS notations for this variant have been shifted INFO/CSQ/HGVSc string VEP HGVS nomenclature: coding DNA reference sequence INFO/CSQ/HGVSp string VEP HGVS nomenclature: protein reference sequence INFO/CSQ/HIGH_INF_POS string VEP A flag indicating if the variant falls in a high information position of a transcription factor binding profile (TFBP) INFO/CSQ/HPO string list VEP plugin Human phenotype ontology term that match INFO/CSQ/IMPACT categorical VEP Impact as predicted by VEP. Categories: LOW, MODERATE, HIGH, MODIFIER INFO/CSQ/IncompletePenetrance string VEP plugin Boolean indicating if the gene is known for incomplete penetrance (1:true) INFO/CSQ/InheritanceModesGene string list VEP plugin List of inheritance modes for the gene INFO/CSQ/INTRON string VEP The intron number (out of total number) INFO/CSQ/MOTIF_NAME string VEP The source and identifier of a transcription factor binding profile aligned at this position INFO/CSQ/MOTIF_POS string VEP The relative position of the variation in the aligned TFBP INFO/CSQ/MOTIF_SCORE_CHANGE string VEP The difference in motif score of the reference and variant sequences for the TFBP INFO/CSQ/ncER float VEP plugin The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. INFO/CSQ/PHENO integer list VEP Indicates if existing variant is associated with a phenotype, disease or trait; multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/phyloP string VEP custom Conservation p-values, see here INFO/CSQ/PICK integer VEP Boolean indicating if this is the VEP picked transcript INFO/CSQ/PolyPhen float VEP PolyPhen score INFO/CSQ/Protein_position string VEP Position within the protein INFO/CSQ/PUBMED integer list VEP PubMed citations INFO/CSQ/REFSEQ_MATCH string VEP Flag indicating whether and how the RefSeq model differs from the underlying genome INFO/CSQ/REFSEQ_OFFSET string VEP ? INFO/CSQ/ReMM float VEP plugin The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations in the human genome in terms of Mendelian diseases. INFO/CSQ/SIFT float VEP SIFT score INFO/CSQ/SOMATIC integer list VEP Somatic status of existing variant(s); multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/SOURCE string VEP ? INFO/CSQ/SpliceAI_pred_DP_AG float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor gain INFO/CSQ/SpliceAI_pred_DP_AL float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor loss INFO/CSQ/SpliceAI_pred_DP_DG float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor gain INFO/CSQ/SpliceAI_pred_DP_DL float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor loss INFO/CSQ/SpliceAI_pred_DS_AG float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor gain INFO/CSQ/SpliceAI_pred_DS_AL float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor loss INFO/CSQ/SpliceAI_pred_DS_DG float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor gain INFO/CSQ/SpliceAI_pred_DS_DL float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor loss INFO/CSQ/SpliceAI_pred_SYMBOL string VEP plugin SpliceAI gene symbol INFO/CSQ/STRAND string VEP The DNA strand (1 or -1) on which the transcript/feature lies INFO/CSQ/SYMBOL string VEP Gene symbol INFO/CSQ/SYMBOL_SOURCE string VEP The source of the gene symbol INFO/CSQ/TRANSCRIPTION_FACTORS string VEP ? INFO/CSQ/VIPC string vip-decision-tree VIP decision tree classification for variant effect INFO/CSQ/VIPP string list vip-decision-tree VIP decision tree path for variant effect INFO/CSQ/VKGL string VEP plugin ? INFO/CSQ/VKGL_CL string VEP plugin VKGL consensus variant classification Details \u00b6 VIP uses the Ensemble Effect Predictor to annotate all variants with their consequences. We use VEP with the refseq option for the transcripts, and with the flags for sift and polyphen annotations enabled. Plugins \u00b6 Below we describe the other sources which we annotate using the VEP plugin framework. CAPICE \u00b6 CAPICE is a computational method for predicting the pathogenicity of SNVs and InDels. It is a gradient boosting tree model trained using a variety of genomic annotations used by CADD score and trained on the clinical significance. CAPICE performs consistently across diverse independent synthetic, and real clinical data sets. It ourperforms the current best method in pathogenicity estimation for variants of different molecular consequences and allele frequency. We run the CAPICE application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the CAPICE output file. VKGL \u00b6 The datashare workgroup of VKGL has set up a central database to enable mutual sharing of variant classifications through a partly automatic process. An additional goal is the public sharing of these data. The currently publicly available part of the database consists of DNA variant classifications established based on (former) diagnostic questions. We add the classifications from an export of the database and use a VEP plugin to annotate the VEP output with the classifications from the this file. SpliceAI \u00b6 SpliceAI is an open-source deep learning splicing prediction algorithm that has demonstrated in the past few years its high ability to predict splicing defects caused by DNA variations. We add the scores from the available precomputed scores of SpliceAI and use a copy of the available VEP plugin to annotate the VEP output with the classifications from the this file. AnnotSV \u00b6 AnnotSV is a program for annotating and ranking structural variations from genomes of several organisms. We run the AnnotSV application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the AnnotSV output file. HPO \u00b6 A file based on the HPO phenotype_to_genes.txt is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence. Inheritance \u00b6 A file based on the CGD database is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence. Grantham \u00b6 The Grantham score attempts to predict the distance between two amino acids, in an evolutionary sense. A lower Grantham score reflects less evolutionary distance. A higher Grantham score reflects a greater evolutionary distance. We use a copy of the VEP plugin by Duarte Molha to annotate the VEP output with Grantham scores. GADO \u00b6 GADO can be used to prioritize genes based on the HPO terms of a patient.. We run the GADO commandline application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the GADO output file. AlphScore \u00b6 AlphScore is a method to predict the pathogenicity of missense variants using features derived from AlphaFold2. We add the available precomputed scores of AlphScore using a custom VEP plugin. ncER \u00b6 The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. The ncER file VIP uses is the version provided by GREEN-VARAN (https://github.com/edg1983/GREEN-VARAN) on Zenodo: https://zenodo.org/records/5636163 ReMM \u00b6 The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations (SNVs and small InDels) in the human genome (hg19) in terms of Mendelian diseases. The VEP plugin is build on top of the GREEN-DB dataset (GRCh38) for ReMM scores: https://zenodo.org/records/3955933 FATHMM-MKL \u00b6 FATHMM-MKL predicts the Functional Consequences of Coding and Non-Coding Single Nucleotide Variants (SNVs) This plugin annotates non-coding scores only, and is build on top of the GREEN-DB dataset (GRCh38) for FATHMM-MKL non coding scores: https://zenodo.org/records/3981121 GREEN-DB constraint scores \u00b6 GREEN-DB GREEN-DB is a comprehensive collection of 2.4 million regulatory elements in the human genome collected from previously published databases, high-throughput screenings and functional studies. This plugin annotates the constrain scores only, and is build on top of the GREEN-DB bed files ( GRCh38): https://zenodo.org/records/5636209 GREEN-DB constrains scores are annotated per region type: enhancers, promotors, bivalent, insulators, silencers. If multiple regions of the same type overlap, VIP annotates the highest constraint score.","title":"Annotations"},{"location":"advanced/annotations/#annotations","text":"VIP annotates variant effects and genotype data for samples using a rich set of tools. Annotions can be used to classify variants using classification trees and displayed in reports .","title":"Annotations"},{"location":"advanced/annotations/#overview","text":"The table contains annotations available in most output files. Depending on the workflow and the configuration used additional annotations might be available, check the output file headers for the complete overview. Similarly, some annotations listed below might be missing from your output file depending on the sample sheet content and configuration. annotation type source description FORMAT/VI string list vip-inheritance-matcher An enumeration of possible inheritance modes (Possible values: AR, AR_C, AD, AD_IP, XLR, XLD) FORMAT/VIC string vip-inheritance-matcher Possible Compound hetrozygote variants FORMAT/VID integer vip-inheritance-matcher De novo variant FORMAT/VIG string list vip-inheritance-matcher Genes with an inheritance match FORMAT/VIM integer vip-inheritance-matcher Inheritance Match: Genotypes, affected statuses and known gene inheritance patterns match FORMAT/VIPC_S string list vip-decision-tree VIP decision tree classification for sample FORMAT/VIPP_S string list vip-decision-tree VIP decision tree path for sample INFO/CSQ/Allele string VEP The variant allele used to calculate the consequence INFO/CSQ/ALLELE_NUM integer VEP Allele nr within the VCF file. INFO/CSQ/ALPHSCORE float VEP plugin AlphScore pathogenicity score for missense variants (see here ) INFO/CSQ/Amino_acids string VEP Reference and variant amino acids INFO/CSQ/ASV_ACMG_class string VEP plugin AnnotSv 'ACMG_class' output INFO/CSQ/ASV_AnnotSV_ranking_criteria string VEP plugin AnnotSv 'AnnotSV_ranking_criteria' output INFO/CSQ/ASV_AnnotSV_ranking_score string VEP plugin AnnotSv 'AnnotSV_ranking_score' output INFO/CSQ/BIOTYPE string VEP Biotype of transcript or regulatory feature INFO/CSQ/CAPICE_CL categorical VEP plugin CAPICE classification (see here ). Categories: B, LB, VUS, LP, P INFO/CSQ/CAPICE_SC float VEP plugin CAPICE score INFO/CSQ/cDNA_position string VEP Position within the cDNA INFO/CSQ/CDS_position string VEP Position within the coding sequence INFO/CSQ/CHECK_REF string VEP Reports variants where the input reference does not match the expected reference INFO/CSQ/CLIN_SIG string list VEP ClinVar classification(s) (do not use, see here ) INFO/CSQ/clinVar_CLNID integer list VEP plugin ClinVar variation identifier INFO/CSQ/clinVar_CLNREVSTAT categorical list VEP plugin ClinVar review status for the Variation ID. Categories: practice_guideline, reviewed_by_expert_panel, criteria_provided, _multiple_submitters, _no_conflicts, _single_submitter, _conflicting_interpretations, no_assertion_criteria_provided, no_assertion_provided INFO/CSQ/clinVar_CLNSIG string VEP plugin Clinical significance for this single variant; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_classifications_of_pathogenicity, Other INFO/CSQ/clinVar_CLNSIGINCL string VEP plugin Clinical significance for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:clinical significance; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_interpretations_of_pathogenicity INFO/CSQ/Codons string VEP Reference and variant codon sequence INFO/CSQ/Consequence string list VEP Effect(s) described as Sequence Ontology term(s) INFO/CSQ/DISTANCE string VEP Shortest distance from variant to transcript INFO/CSQ/existing_InFrame_oORFs string VEP plugin The number of existing inFrame overlapping ORFs (inFrame oORF) at the 5 prime UTR INFO/CSQ/existing_OutOfFrame_oORFs string VEP plugin The number of existing out-of-frame overlapping ORFs (OutOfFrame oORF) at the 5 prime UTR INFO/CSQ/existing_uORFs string VEP plugin The number of existing uORFs with a stop codon within the 5 prime UTR INFO/CSQ/Existing_variation string list VEP Identifier(s) of co-located known variants INFO/CSQ/EXON string VEP The exon number (out of total number) INFO/CSQ/Feature string VEP Ensembl stable ID of feature INFO/CSQ/Feature_type categorical VEP VEP feature type. Categories: Transcript, RegulatoryFeature, MotifFeature INFO/CSQ/FATHMM_MKL_NC float VEP plugin The FATHMM-MKL score for Non-Coding Single Nucleotide Variants (SNVs) INFO/CSQ/five_prime_UTR_variant_annotation string VEP plugin Output the annotation of a given 5 prime UTR variant INFO/CSQ/five_prime_UTR_variant_consequence string VEP plugin Output the variant consequences of a given 5 prime UTR variant: uAUG_gained, uAUG_lost, uSTOP_lost or uFrameshift INFO/CSQ/FLAGS string list VEP Transcript quality flags (cds_start_NF: CDS 5' incomplete, cds_end_NF: CDS 3' incomplete) INFO/CSQ/GADO_PD categorical VEP plugin GADO prediction for the relation between the HPO terms of the proband(s) and the gene, HC: high confidence, LC: low confidence. Categories: LC, HC INFO/CSQ/GADO_SC float VEP plugin The combined prioritization GADO Z-score over the HPO of the proband(s) terms for this case INFO/CSQ/Gene string VEP Ensembl stable ID of affected gene INFO/CSQ/gnomAD_COV float VEP plugin gnomAD coverage (percent of individuals in gnomAD source) INFO/CSQ/gnomAD_AF float VEP plugin gnomAD allele frequency INFO/CSQ/gnomAD_FAF95 float VEP plugin gnomAD filter allele frequency (95% confidence) INFO/CSQ/gnomAD_FAF99 float VEP plugin gnomAD filter allele frequency (99% confidence) INFO/CSQ/gnomAD_HN integer VEP plugin gnomAD number of homozygotes INFO/CSQ/gnomAD_QC string list VEP plugin gnomAD quality control filters that failed INFO/CSQ/gnomAD_SRC categorical VEP plugin gnomAD source (E=exomes, G=genomes, T=total) INFO/CSQ/Grantham string VEP plugin Grantham Matrix score - Grantham, R. Amino Acid Difference Formula to Help Explain Protein Evolution, Science 1974 Sep 6;185(4154):862-4 INFO/CSQ/HGNC_ID integer VEP HGNC gene identifier INFO/CSQ/HGVS_OFFSET string VEP Indicates by how many bases the HGVS notations for this variant have been shifted INFO/CSQ/HGVSc string VEP HGVS nomenclature: coding DNA reference sequence INFO/CSQ/HGVSp string VEP HGVS nomenclature: protein reference sequence INFO/CSQ/HIGH_INF_POS string VEP A flag indicating if the variant falls in a high information position of a transcription factor binding profile (TFBP) INFO/CSQ/HPO string list VEP plugin Human phenotype ontology term that match INFO/CSQ/IMPACT categorical VEP Impact as predicted by VEP. Categories: LOW, MODERATE, HIGH, MODIFIER INFO/CSQ/IncompletePenetrance string VEP plugin Boolean indicating if the gene is known for incomplete penetrance (1:true) INFO/CSQ/InheritanceModesGene string list VEP plugin List of inheritance modes for the gene INFO/CSQ/INTRON string VEP The intron number (out of total number) INFO/CSQ/MOTIF_NAME string VEP The source and identifier of a transcription factor binding profile aligned at this position INFO/CSQ/MOTIF_POS string VEP The relative position of the variation in the aligned TFBP INFO/CSQ/MOTIF_SCORE_CHANGE string VEP The difference in motif score of the reference and variant sequences for the TFBP INFO/CSQ/ncER float VEP plugin The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. INFO/CSQ/PHENO integer list VEP Indicates if existing variant is associated with a phenotype, disease or trait; multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/phyloP string VEP custom Conservation p-values, see here INFO/CSQ/PICK integer VEP Boolean indicating if this is the VEP picked transcript INFO/CSQ/PolyPhen float VEP PolyPhen score INFO/CSQ/Protein_position string VEP Position within the protein INFO/CSQ/PUBMED integer list VEP PubMed citations INFO/CSQ/REFSEQ_MATCH string VEP Flag indicating whether and how the RefSeq model differs from the underlying genome INFO/CSQ/REFSEQ_OFFSET string VEP ? INFO/CSQ/ReMM float VEP plugin The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations in the human genome in terms of Mendelian diseases. INFO/CSQ/SIFT float VEP SIFT score INFO/CSQ/SOMATIC integer list VEP Somatic status of existing variant(s); multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/SOURCE string VEP ? INFO/CSQ/SpliceAI_pred_DP_AG float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor gain INFO/CSQ/SpliceAI_pred_DP_AL float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor loss INFO/CSQ/SpliceAI_pred_DP_DG float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor gain INFO/CSQ/SpliceAI_pred_DP_DL float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor loss INFO/CSQ/SpliceAI_pred_DS_AG float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor gain INFO/CSQ/SpliceAI_pred_DS_AL float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor loss INFO/CSQ/SpliceAI_pred_DS_DG float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor gain INFO/CSQ/SpliceAI_pred_DS_DL float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor loss INFO/CSQ/SpliceAI_pred_SYMBOL string VEP plugin SpliceAI gene symbol INFO/CSQ/STRAND string VEP The DNA strand (1 or -1) on which the transcript/feature lies INFO/CSQ/SYMBOL string VEP Gene symbol INFO/CSQ/SYMBOL_SOURCE string VEP The source of the gene symbol INFO/CSQ/TRANSCRIPTION_FACTORS string VEP ? INFO/CSQ/VIPC string vip-decision-tree VIP decision tree classification for variant effect INFO/CSQ/VIPP string list vip-decision-tree VIP decision tree path for variant effect INFO/CSQ/VKGL string VEP plugin ? INFO/CSQ/VKGL_CL string VEP plugin VKGL consensus variant classification","title":"Overview"},{"location":"advanced/annotations/#details","text":"VIP uses the Ensemble Effect Predictor to annotate all variants with their consequences. We use VEP with the refseq option for the transcripts, and with the flags for sift and polyphen annotations enabled.","title":"Details"},{"location":"advanced/annotations/#plugins","text":"Below we describe the other sources which we annotate using the VEP plugin framework.","title":"Plugins"},{"location":"advanced/annotations/#capice","text":"CAPICE is a computational method for predicting the pathogenicity of SNVs and InDels. It is a gradient boosting tree model trained using a variety of genomic annotations used by CADD score and trained on the clinical significance. CAPICE performs consistently across diverse independent synthetic, and real clinical data sets. It ourperforms the current best method in pathogenicity estimation for variants of different molecular consequences and allele frequency. We run the CAPICE application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the CAPICE output file.","title":"CAPICE"},{"location":"advanced/annotations/#vkgl","text":"The datashare workgroup of VKGL has set up a central database to enable mutual sharing of variant classifications through a partly automatic process. An additional goal is the public sharing of these data. The currently publicly available part of the database consists of DNA variant classifications established based on (former) diagnostic questions. We add the classifications from an export of the database and use a VEP plugin to annotate the VEP output with the classifications from the this file.","title":"VKGL"},{"location":"advanced/annotations/#spliceai","text":"SpliceAI is an open-source deep learning splicing prediction algorithm that has demonstrated in the past few years its high ability to predict splicing defects caused by DNA variations. We add the scores from the available precomputed scores of SpliceAI and use a copy of the available VEP plugin to annotate the VEP output with the classifications from the this file.","title":"SpliceAI"},{"location":"advanced/annotations/#annotsv","text":"AnnotSV is a program for annotating and ranking structural variations from genomes of several organisms. We run the AnnotSV application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the AnnotSV output file.","title":"AnnotSV"},{"location":"advanced/annotations/#hpo","text":"A file based on the HPO phenotype_to_genes.txt is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence.","title":"HPO"},{"location":"advanced/annotations/#inheritance","text":"A file based on the CGD database is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence.","title":"Inheritance"},{"location":"advanced/annotations/#grantham","text":"The Grantham score attempts to predict the distance between two amino acids, in an evolutionary sense. A lower Grantham score reflects less evolutionary distance. A higher Grantham score reflects a greater evolutionary distance. We use a copy of the VEP plugin by Duarte Molha to annotate the VEP output with Grantham scores.","title":"Grantham"},{"location":"advanced/annotations/#gado","text":"GADO can be used to prioritize genes based on the HPO terms of a patient.. We run the GADO commandline application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the GADO output file.","title":"GADO"},{"location":"advanced/annotations/#alphscore","text":"AlphScore is a method to predict the pathogenicity of missense variants using features derived from AlphaFold2. We add the available precomputed scores of AlphScore using a custom VEP plugin.","title":"AlphScore"},{"location":"advanced/annotations/#ncer","text":"The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. The ncER file VIP uses is the version provided by GREEN-VARAN (https://github.com/edg1983/GREEN-VARAN) on Zenodo: https://zenodo.org/records/5636163","title":"ncER"},{"location":"advanced/annotations/#remm","text":"The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations (SNVs and small InDels) in the human genome (hg19) in terms of Mendelian diseases. The VEP plugin is build on top of the GREEN-DB dataset (GRCh38) for ReMM scores: https://zenodo.org/records/3955933","title":"ReMM"},{"location":"advanced/annotations/#fathmm-mkl","text":"FATHMM-MKL predicts the Functional Consequences of Coding and Non-Coding Single Nucleotide Variants (SNVs) This plugin annotates non-coding scores only, and is build on top of the GREEN-DB dataset (GRCh38) for FATHMM-MKL non coding scores: https://zenodo.org/records/3981121","title":"FATHMM-MKL"},{"location":"advanced/annotations/#green-db-constraint-scores","text":"GREEN-DB GREEN-DB is a comprehensive collection of 2.4 million regulatory elements in the human genome collected from previously published databases, high-throughput screenings and functional studies. This plugin annotates the constrain scores only, and is build on top of the GREEN-DB bed files ( GRCh38): https://zenodo.org/records/5636209 GREEN-DB constrains scores are annotated per region type: enhancers, promotors, bivalent, insulators, silencers. If multiple regions of the same type overlap, VIP annotates the highest constraint score.","title":"GREEN-DB constraint scores"},{"location":"advanced/classification_trees/","text":"Classification trees \u00b6 In order to end up with a small list of candidate variant records for interpretation VIP performs variant filtration by: Classify all variant-consequences based on variant annotations Remove variant-consequences based on their classes Annotate remaining variant records using inheritance matcher Classify all variant-consequences based on variant annotations in the context of samples Remove variant-consequences based on their classes. Remove variants that had all their variant-consequences removed The following sections describe the default variant filtration strategies and how to customize classification and filtration. Default \u00b6 VIP contains default filtration strategies for variant-consequences as well as variant-consequences in the context of samples. Variant-consequences \u00b6 The default decision tree to classify variant-consequences works as follows: Each variant-consequence is classified as Benign , Likely Benign , VUS , Likely Pathogenic , Pathogenic or Remove Variant-consequences classified as Benign , Likely Benign and Remove are removed by default. Above: default GRCh38 variant classification tree Variant-consequences (samples) \u00b6 The default decision tree to classify variant-consequences in the context of samples works as follows: Each variant-consequence-sample is classified as U1 (usable: probably), U2 (usable: maybe), U3 (usable: probably not) and U4 (usable: only in cases of suspected incomplete penetrance). Variant-consequences classified as U3 and U4 for all samples are removed by default. Above: default variant sample classification tree Customization \u00b6 The default variant filtration strategy can be customized using the following parameters (see here ): vcf.classify.GRCh38.decision_tree vcf.filter.classes vcf.classify_samples.GRCh38.decision_tree vcf.filter_samples.classes The following repositories might be of interest when creating a new decision tree: vip vip-decision-tree You are free to use your own set of classes in your decision tree. Keep in mind to update the filter classes parameters accordingly.","title":"Classification trees"},{"location":"advanced/classification_trees/#classification-trees","text":"In order to end up with a small list of candidate variant records for interpretation VIP performs variant filtration by: Classify all variant-consequences based on variant annotations Remove variant-consequences based on their classes Annotate remaining variant records using inheritance matcher Classify all variant-consequences based on variant annotations in the context of samples Remove variant-consequences based on their classes. Remove variants that had all their variant-consequences removed The following sections describe the default variant filtration strategies and how to customize classification and filtration.","title":"Classification trees"},{"location":"advanced/classification_trees/#default","text":"VIP contains default filtration strategies for variant-consequences as well as variant-consequences in the context of samples.","title":"Default"},{"location":"advanced/classification_trees/#variant-consequences","text":"The default decision tree to classify variant-consequences works as follows: Each variant-consequence is classified as Benign , Likely Benign , VUS , Likely Pathogenic , Pathogenic or Remove Variant-consequences classified as Benign , Likely Benign and Remove are removed by default. Above: default GRCh38 variant classification tree","title":"Variant-consequences"},{"location":"advanced/classification_trees/#variant-consequences-samples","text":"The default decision tree to classify variant-consequences in the context of samples works as follows: Each variant-consequence-sample is classified as U1 (usable: probably), U2 (usable: maybe), U3 (usable: probably not) and U4 (usable: only in cases of suspected incomplete penetrance). Variant-consequences classified as U3 and U4 for all samples are removed by default. Above: default variant sample classification tree","title":"Variant-consequences (samples)"},{"location":"advanced/classification_trees/#customization","text":"The default variant filtration strategy can be customized using the following parameters (see here ): vcf.classify.GRCh38.decision_tree vcf.filter.classes vcf.classify_samples.GRCh38.decision_tree vcf.filter_samples.classes The following repositories might be of interest when creating a new decision tree: vip vip-decision-tree You are free to use your own set of classes in your decision tree. Keep in mind to update the filter classes parameters accordingly.","title":"Customization"},{"location":"advanced/report_templates/","text":"Report templates \u00b6 VIP outputs a standalone HTML report that can be viewed in any modern browser. The report is based on the input sample sheet information and the output variant vcf data. Default \u00b6 As a default VIP uses a report template that is suitable for most analysis: Above: default report template Customization \u00b6 Using the vcf.report.template parameter (see here ) it is possible to specify a different report template to create reports tailered to your needs. The following repositories might be of interest when creating a new report template: vip-report-api vip-report-template vip-report-vcf vite-plugin-inline The vip-report tool creates reports based on a report template as described in the following repositories: vip-report vip-utils Configuration \u00b6 A configuration .json file can be used in combination with a vcf.report.template to create reports that e.g. show specific variant content or variant filters. The allowed contents of a configuration .json file depends on the vcf.report.template used. For the default vcf.report.template the configuration options are described here .","title":"Report templates"},{"location":"advanced/report_templates/#report-templates","text":"VIP outputs a standalone HTML report that can be viewed in any modern browser. The report is based on the input sample sheet information and the output variant vcf data.","title":"Report templates"},{"location":"advanced/report_templates/#default","text":"As a default VIP uses a report template that is suitable for most analysis: Above: default report template","title":"Default"},{"location":"advanced/report_templates/#customization","text":"Using the vcf.report.template parameter (see here ) it is possible to specify a different report template to create reports tailered to your needs. The following repositories might be of interest when creating a new report template: vip-report-api vip-report-template vip-report-vcf vite-plugin-inline The vip-report tool creates reports based on a report template as described in the following repositories: vip-report vip-utils","title":"Customization"},{"location":"advanced/report_templates/#configuration","text":"A configuration .json file can be used in combination with a vcf.report.template to create reports that e.g. show specific variant content or variant filters. The allowed contents of a configuration .json file depends on the vcf.report.template used. For the default vcf.report.template the configuration options are described here .","title":"Configuration"},{"location":"examples/multi-project/","text":"Multi-project \u00b6 VIP can be used to analyse different projects in one run, producing output files per project. To achieve this you just need to specify different projects in one samplesheet. family_id individual_id paternal_id maternal_id sex affected proband sequencing_platform fastq fastq_r1 fastq_r2 vip0 fam0 individual0 individual1 male true true nanopore path/to/vip0.fastq.gz vip0 fam0 individual1 female false false nanopore path/to/vip1.fastq.gz vip1 fam1 individual2 individual3 individual4 male false false paacbio_hifi path/to/vip2.fastq.gz vip1 fam1 individual3 male false false pacbio_hifi path/to/vip3.fastq.gz vip1 fam1 individual4 female false true pacbio_hifi path/to/vip4.fastq.gz vip2 fam2 individual5 male true true illumina /vip5_1.fastq.gz /vip5_2.fastq.gz Run the pipeline \u00b6 cd vip vip --workflow fastq --input path/to/samplesheet.tsv --output path/to/output/folder For a working example on how to generate output for multiple projects see here .","title":"Multi-project"},{"location":"examples/multi-project/#multi-project","text":"VIP can be used to analyse different projects in one run, producing output files per project. To achieve this you just need to specify different projects in one samplesheet. family_id individual_id paternal_id maternal_id sex affected proband sequencing_platform fastq fastq_r1 fastq_r2 vip0 fam0 individual0 individual1 male true true nanopore path/to/vip0.fastq.gz vip0 fam0 individual1 female false false nanopore path/to/vip1.fastq.gz vip1 fam1 individual2 individual3 individual4 male false false paacbio_hifi path/to/vip2.fastq.gz vip1 fam1 individual3 male false false pacbio_hifi path/to/vip3.fastq.gz vip1 fam1 individual4 female false true pacbio_hifi path/to/vip4.fastq.gz vip2 fam2 individual5 male true true illumina /vip5_1.fastq.gz /vip5_2.fastq.gz","title":"Multi-project"},{"location":"examples/multi-project/#run-the-pipeline","text":"cd vip vip --workflow fastq --input path/to/samplesheet.tsv --output path/to/output/folder For a working example on how to generate output for multiple projects see here .","title":"Run the pipeline"},{"location":"examples/nanopore/","text":"Nanopore \u00b6 To run vip with nanopore data, just specify nanopore as the sequencing_platform in your sample sheet. The other options for this field are \"illumina\" and \"pacbio_hifi\" and can be used in a similar manner. Samplesheet \u00b6 See an example for the samplesheet below, the example show the samplesheet for a run starting from the cram, but the 'sequencing_platform' can also be used to achieve the same for a run with the fastq workflow. individual_id sequencing_platform cram your_sample_id nanopore path/to/your/nanopore.cram Run the pipeline \u00b6 cd vip vip --workflow cram --input path/to/samplesheet.tsv --output path/to/output/folder For an example on how to generate output for FASTQ files using the Oxford Nanopore platform see here .","title":"Nanopore"},{"location":"examples/nanopore/#nanopore","text":"To run vip with nanopore data, just specify nanopore as the sequencing_platform in your sample sheet. The other options for this field are \"illumina\" and \"pacbio_hifi\" and can be used in a similar manner.","title":"Nanopore"},{"location":"examples/nanopore/#samplesheet","text":"See an example for the samplesheet below, the example show the samplesheet for a run starting from the cram, but the 'sequencing_platform' can also be used to achieve the same for a run with the fastq workflow. individual_id sequencing_platform cram your_sample_id nanopore path/to/your/nanopore.cram","title":"Samplesheet"},{"location":"examples/nanopore/#run-the-pipeline","text":"cd vip vip --workflow cram --input path/to/samplesheet.tsv --output path/to/output/folder For an example on how to generate output for FASTQ files using the Oxford Nanopore platform see here .","title":"Run the pipeline"},{"location":"examples/reanalysis/","text":"Reanalysis \u00b6 The VCF workflow can be used to reanalyse data from previous runs with the pipeline. It is possible to start from the normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples steps, this can for example be usefull if you update one of your decision trees, or if you which to re-run the inheritance matching with a different set of low-penetrance genes. For reanalysis the basics of running VIP remain the same, however the correct intermediate file should be provided as input in the sample sheet. Several intermediate results are available in the \"intermediates\" subfolder of your output folder. Furthermore the step form which you whish to start should be added in the configuration parameter \"vcf.start\"","title":"Reanalysis"},{"location":"examples/reanalysis/#reanalysis","text":"The VCF workflow can be used to reanalyse data from previous runs with the pipeline. It is possible to start from the normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples steps, this can for example be usefull if you update one of your decision trees, or if you which to re-run the inheritance matching with a different set of low-penetrance genes. For reanalysis the basics of running VIP remain the same, however the correct intermediate file should be provided as input in the sample sheet. Several intermediate results are available in the \"intermediates\" subfolder of your output folder. Furthermore the step form which you whish to start should be added in the configuration parameter \"vcf.start\"","title":"Reanalysis"},{"location":"get_started/installation/","text":"Installation \u00b6 git clone https://github.com/molgenis/vip bash vip/install.sh","title":"Installation"},{"location":"get_started/installation/#installation","text":"git clone https://github.com/molgenis/vip bash vip/install.sh","title":"Installation"},{"location":"get_started/requirements/","text":"Requirements \u00b6 Before installing VIP please check whether your system meets the following requirements: GNU-based Linux (e.g. Ubuntu, Windows Subsystem for Linux ) with x86_64 architecture Bash \u2265 3.2 Java \u2265 11 Apptainer (setuid installation) 8GB RAM 1 220GB disk space 1) The memory requirements differ per workflow and depend, on the size of your input data, the scheduler that you use, the amount of parallelization. For example, executing VIP using a job scheduler will reduce the memory requirements on the system submitting the jobs to 1-2GB. Optional \u00b6 VIP auto-detects whether Slurm is available on the system and, if available, will schedule its jobs with Slurm. Otherwise, the jobs will be submitted on the local system.","title":"Requirements"},{"location":"get_started/requirements/#requirements","text":"Before installing VIP please check whether your system meets the following requirements: GNU-based Linux (e.g. Ubuntu, Windows Subsystem for Linux ) with x86_64 architecture Bash \u2265 3.2 Java \u2265 11 Apptainer (setuid installation) 8GB RAM 1 220GB disk space 1) The memory requirements differ per workflow and depend, on the size of your input data, the scheduler that you use, the amount of parallelization. For example, executing VIP using a job scheduler will reduce the memory requirements on the system submitting the jobs to 1-2GB.","title":"Requirements"},{"location":"get_started/requirements/#optional","text":"VIP auto-detects whether Slurm is available on the system and, if available, will schedule its jobs with Slurm. Otherwise, the jobs will be submitted on the local system.","title":"Optional"},{"location":"get_started/start_running/","text":"Start running \u00b6 After installation, it is time for a quick test to verify that VIP works using some test data. Input \u00b6 To run VIP you need to provide at least workflow , input and output arguments (described in detail here ). The following example processes a collection of .vcf files. cd vip vip --workflow vcf --input test/resources/multiproject.tsv --output output_multiproject Output \u00b6 Executing the above command displays progress until the pipeline completes. N E X T F L O W ~ version 22.10.6 Launching `vip_vcf.nf` [disturbed_khorana] DSL2 - revision: 8f8c80809c executor > local (27) [- ] process > samtools_index - [71/4bb8b5] process > vcf:convert (2) [100%] 5 of 5 \u2714 [c7/1f8dc7] process > vcf:index (1) [100%] 1 of 1 \u2714 [ad/51639f] process > vcf:stats (1) [100%] 2 of 2 \u2714 [54/a6c17d] process > vcf:merge_vcf (1) [100%] 1 of 1 \u2714 [a5/790ba1] process > vcf:merge_gvcf (1) [100%] 1 of 1 \u2714 [- ] process > vcf:split - [64/dafd8f] process > vcf:normalize (2) [100%] 2 of 2 \u2714 [c4/ed6e06] process > vcf:annotate (1) [100%] 2 of 2 \u2714 [43/c63075] process > vcf:classify (2) [100%] 2 of 2 \u2714 [66/3adcef] process > vcf:filter (2) [100%] 2 of 2 \u2714 [d1/1d89ee] process > vcf:inheritance (1) [100%] 2 of 2 \u2714 [d7/d717a0] process > vcf:classify_samples (1) [100%] 2 of 2 \u2714 [45/0564f9] process > vcf:filter_samples (1) [100%] 2 of 2 \u2714 [- ] process > vcf:concat - [- ] process > vcf:slice - [ad/fc2b6c] process > vcf:report (2) [100%] 3 of 3 \u2714 Duration : 1m 00s CPU hours : 0.2 Succeeded : 27 Results \u00b6 ls -1 output_multiproject/ The output folder contains one report for each project described in test/resources/multiproject.tsv . intermediates nxf_report.html nxf_timeline.html vip0.html vip0.vcf.gz vip0.vcf.gz.csi vip1.html vip1.vcf.gz vip1.vcf.gz.csi vip2.html vip2.vcf.gz vip2.vcf.gz.csi The files vip0.html , vip1.html and vip2.html can be opened in your browser and display an interactive report based on the corresponding .vcf.gz output files. The outputs are described in more detail here .","title":"Start running"},{"location":"get_started/start_running/#start-running","text":"After installation, it is time for a quick test to verify that VIP works using some test data.","title":"Start running"},{"location":"get_started/start_running/#input","text":"To run VIP you need to provide at least workflow , input and output arguments (described in detail here ). The following example processes a collection of .vcf files. cd vip vip --workflow vcf --input test/resources/multiproject.tsv --output output_multiproject","title":"Input"},{"location":"get_started/start_running/#output","text":"Executing the above command displays progress until the pipeline completes. N E X T F L O W ~ version 22.10.6 Launching `vip_vcf.nf` [disturbed_khorana] DSL2 - revision: 8f8c80809c executor > local (27) [- ] process > samtools_index - [71/4bb8b5] process > vcf:convert (2) [100%] 5 of 5 \u2714 [c7/1f8dc7] process > vcf:index (1) [100%] 1 of 1 \u2714 [ad/51639f] process > vcf:stats (1) [100%] 2 of 2 \u2714 [54/a6c17d] process > vcf:merge_vcf (1) [100%] 1 of 1 \u2714 [a5/790ba1] process > vcf:merge_gvcf (1) [100%] 1 of 1 \u2714 [- ] process > vcf:split - [64/dafd8f] process > vcf:normalize (2) [100%] 2 of 2 \u2714 [c4/ed6e06] process > vcf:annotate (1) [100%] 2 of 2 \u2714 [43/c63075] process > vcf:classify (2) [100%] 2 of 2 \u2714 [66/3adcef] process > vcf:filter (2) [100%] 2 of 2 \u2714 [d1/1d89ee] process > vcf:inheritance (1) [100%] 2 of 2 \u2714 [d7/d717a0] process > vcf:classify_samples (1) [100%] 2 of 2 \u2714 [45/0564f9] process > vcf:filter_samples (1) [100%] 2 of 2 \u2714 [- ] process > vcf:concat - [- ] process > vcf:slice - [ad/fc2b6c] process > vcf:report (2) [100%] 3 of 3 \u2714 Duration : 1m 00s CPU hours : 0.2 Succeeded : 27","title":"Output"},{"location":"get_started/start_running/#results","text":"ls -1 output_multiproject/ The output folder contains one report for each project described in test/resources/multiproject.tsv . intermediates nxf_report.html nxf_timeline.html vip0.html vip0.vcf.gz vip0.vcf.gz.csi vip1.html vip1.vcf.gz vip1.vcf.gz.csi vip2.html vip2.vcf.gz vip2.vcf.gz.csi The files vip0.html , vip1.html and vip2.html can be opened in your browser and display an interactive report based on the corresponding .vcf.gz output files. The outputs are described in more detail here .","title":"Results"},{"location":"help/frequently_asked_questions/","text":"Frequently asked questions \u00b6 Why doesn't my report contain any variants? \u00b6 VIP filters your input variants using classification trees for variant-effect and variant-sample combinations. Usually if your report doesn't contain any records this implies that they were filtered out based on these trees. Inspect the _classifications.vcf.gz files in the intermediates output folder to determine why a variant record was removed. Why does VIP fail with an Unexpected Error [InvocationTargetException] ? \u00b6 This issue can mean a number of things, check the .nxf.log for more details. One of the causes is a mismatch between the reference genome that was used to call the variants in your .vcf file and the reference genome used by VIP. For example: Your variants are called with a reference genome that differs from the default VIP reference genome Your variants are called with GRCh37 and you use the GRCh38 assembly or vice-versa Why does VIP fail with a file not found error but my file exists? \u00b6 You might need to update APPTAINER_BIND , for more details see here . To understand the cause of this issue take a look at the Apptainer documentation . Why does VIP fail with an exit code 137? \u00b6 A process has run out of memory. See the config documentation on how to update resource assignments for some or all processes. Why does the genome browser in the report not show all the reads for my structural variant? \u00b6 Since structural variants can be very large it is not possible to keep all reads for these variants in the report. The Cram file with all reads is produced as an intermediate result of VIP, and can be viewed using the desktop version of IGV .","title":"Frequently asked questions"},{"location":"help/frequently_asked_questions/#frequently-asked-questions","text":"","title":"Frequently asked questions"},{"location":"help/frequently_asked_questions/#why-doesnt-my-report-contain-any-variants","text":"VIP filters your input variants using classification trees for variant-effect and variant-sample combinations. Usually if your report doesn't contain any records this implies that they were filtered out based on these trees. Inspect the _classifications.vcf.gz files in the intermediates output folder to determine why a variant record was removed.","title":"Why doesn't my report contain any variants?"},{"location":"help/frequently_asked_questions/#why-does-vip-fail-with-an-unexpected-error-invocationtargetexception","text":"This issue can mean a number of things, check the .nxf.log for more details. One of the causes is a mismatch between the reference genome that was used to call the variants in your .vcf file and the reference genome used by VIP. For example: Your variants are called with a reference genome that differs from the default VIP reference genome Your variants are called with GRCh37 and you use the GRCh38 assembly or vice-versa","title":"Why does VIP fail with an Unexpected Error [InvocationTargetException]?"},{"location":"help/frequently_asked_questions/#why-does-vip-fail-with-a-file-not-found-error-but-my-file-exists","text":"You might need to update APPTAINER_BIND , for more details see here . To understand the cause of this issue take a look at the Apptainer documentation .","title":"Why does VIP fail with a file not found error but my file exists?"},{"location":"help/frequently_asked_questions/#why-does-vip-fail-with-an-exit-code-137","text":"A process has run out of memory. See the config documentation on how to update resource assignments for some or all processes.","title":"Why does VIP fail with an exit code 137?"},{"location":"help/frequently_asked_questions/#why-does-the-genome-browser-in-the-report-not-show-all-the-reads-for-my-structural-variant","text":"Since structural variants can be very large it is not possible to keep all reads for these variants in the report. The Cram file with all reads is produced as an intermediate result of VIP, and can be viewed using the desktop version of IGV .","title":"Why does the genome browser in the report not show all the reads for my structural variant?"},{"location":"help/issues/","text":"Issues \u00b6 Please use this link to report issues or ask questions. We do not have an e-mail address, forum or community chat at the moment. Known issues might be located in one of our VIP repositories: vip vip-decision-tree vip-inheritance vip-inheritance-matcher vip-report vip-report-api vip-report-template vip-report-vcf vip-utils capice vite-plugin-inline","title":"Issues"},{"location":"help/issues/#issues","text":"Please use this link to report issues or ask questions. We do not have an e-mail address, forum or community chat at the moment. Known issues might be located in one of our VIP repositories: vip vip-decision-tree vip-inheritance vip-inheritance-matcher vip-report vip-report-api vip-report-template vip-report-vcf vip-utils capice vite-plugin-inline","title":"Issues"},{"location":"home/key_features/","text":"Key features \u00b6 VIP is an easy to install, easy to use, portable and flexible pipeline implemented using Nextflow . Features include: Workflows for a broad range of input file types: bam , cram , fastq , g.vcf , vcf Produces stand-alone variant interpretation HTML report with integrated genome browser Long-read sequencing support (Oxford Nanopore, PacBio HiFi) Short-read sequencing support (Illumina, both single and paired-end reads) Supports GRCh38, supports GRCh37 and T2T via liftover Supports multiallelic variants Short variant detection Structural variant detection Short tandem repeat detection Copy number variant detection (Oxford Nanopore, PacBio HiFi) Consequence aware Rich set of variant annotations Pathogenic variant prioritization (CAPICE) Phenotype support (HPO) Inheritance matching (VIP inheritance matcher) Variant classification and filtration using customizable decision trees Variant reporting using customizable report templates Quick reanalysis","title":"Key features"},{"location":"home/key_features/#key-features","text":"VIP is an easy to install, easy to use, portable and flexible pipeline implemented using Nextflow . Features include: Workflows for a broad range of input file types: bam , cram , fastq , g.vcf , vcf Produces stand-alone variant interpretation HTML report with integrated genome browser Long-read sequencing support (Oxford Nanopore, PacBio HiFi) Short-read sequencing support (Illumina, both single and paired-end reads) Supports GRCh38, supports GRCh37 and T2T via liftover Supports multiallelic variants Short variant detection Structural variant detection Short tandem repeat detection Copy number variant detection (Oxford Nanopore, PacBio HiFi) Consequence aware Rich set of variant annotations Pathogenic variant prioritization (CAPICE) Phenotype support (HPO) Inheritance matching (VIP inheritance matcher) Variant classification and filtration using customizable decision trees Variant reporting using customizable report templates Quick reanalysis","title":"Key features"},{"location":"usage/command-line-options/","text":"Command-line options \u00b6 The vip command takes input vcf/cram/fastq data and produces a filtered annotated .vcf.gz containing candidate variants of interest. In addition to the .vcf.gz an interactive .html report is produced that can be displayed in any modern web browser. vip --help prints the available command-line options: usage: vip -w -i -o -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf -i, --input path to sample sheet .tsv -o, --output output folder -c, --config path to additional nextflow .cfg (optional) -p, --profile nextflow configuration profile (optional) -r, --resume resume execution using cached results (default: false) -h, --help print this message and exit Required \u00b6 workflow as described here input as described here output as described here Optional \u00b6 config as described here profile the configuration profile to use. allowed values are local , slurm plus any profiles added in --config resume useful to continue executions that was stopped by an error using cached results Defaults \u00b6 By default vip : Assumes an Illumina sequencing platform was used to generate the input data Assumes whole-genome sequencing (WGS) method was used to generate the input data Uses a GRCh38 reference genome ( GCA_000001405.15 / GCF_000001405.26 ) Provides classification trees for default variant filtration. For details, see here Creates reports using a default report template. For details, see here","title":"Command-line options"},{"location":"usage/command-line-options/#command-line-options","text":"The vip command takes input vcf/cram/fastq data and produces a filtered annotated .vcf.gz containing candidate variants of interest. In addition to the .vcf.gz an interactive .html report is produced that can be displayed in any modern web browser. vip --help prints the available command-line options: usage: vip -w -i -o -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf -i, --input path to sample sheet .tsv -o, --output output folder -c, --config path to additional nextflow .cfg (optional) -p, --profile nextflow configuration profile (optional) -r, --resume resume execution using cached results (default: false) -h, --help print this message and exit","title":"Command-line options"},{"location":"usage/command-line-options/#required","text":"workflow as described here input as described here output as described here","title":"Required"},{"location":"usage/command-line-options/#optional","text":"config as described here profile the configuration profile to use. allowed values are local , slurm plus any profiles added in --config resume useful to continue executions that was stopped by an error using cached results","title":"Optional"},{"location":"usage/command-line-options/#defaults","text":"By default vip : Assumes an Illumina sequencing platform was used to generate the input data Assumes whole-genome sequencing (WGS) method was used to generate the input data Uses a GRCh38 reference genome ( GCA_000001405.15 / GCF_000001405.26 ) Provides classification trees for default variant filtration. For details, see here Creates reports using a default report template. For details, see here","title":"Defaults"},{"location":"usage/config/","text":"Config \u00b6 The VIP configuration is stored in Nextflow configuration files. An additional configuration file can be supplied on the command-line to overwrite default parameter values, add/update profiles, configure processes and update environment variables. Parameters \u00b6 key default description assembly GRCh38 output assembly, allowed values: [GRCh38] GRCh37.reference.chain.GRCh38 installed chain file to convert GRCh37 to GRCh38 data GRCh37.reference.fasta installed GRCh37.reference.fastaFai installed GRCh37.reference.fastaGzi installed GRCh38.reference.fasta installed GCA_000001405.15_GRCh38_no_alt_analysis_set GRCh38.reference.fastaFai installed GRCh38.reference.fastaGzi installed T2T.reference.chain.GRCh38 installed chain file to convert T2T to GRCh38 data T2T.reference.fasta T2T.reference.fastaFai T2T.reference.fastaGzi pcr_performed false Indication if PCR was performed to get the data, if so certain tools will be disabled due to not being compatible with this data. Warning: Please take note of the fact that for a different reference fasta.gz the unzipped referenfasta file is also required. Both the zipped and unzipped fasta should have an index. FASTQ \u00b6 key default description GRCh38.reference.fastaMmi installed for details, see here fastp.options for details, see here minimap2.soft_clipping true In SAM output, use soft clipping for supplementary alignments (required when STR calling with Straglr) minimap2.nanopore_preset lr:hq Preset to use for aligning Nanopore data, options: 'lr:hq' 'map-ont'. CRAM \u00b6 key default description cnv.spectre.GRCh38.blacklist installed blacklist in bed format for sites that will be ignored cnv.spectre.GRCh38.metadata installed metadata file for Ns removal, update this file only when using a different GRCh38 version than the one provided by VIP. cram.call_snv true enable/disable the detection of short variants cram.call_str true enable/disable the detection of short tandem repeats cram.call_sv true enable/disable the detection of structural variants. disable this manually in case of non-paired-end Illumina data. snv.deeptrio.illumina.WES.model_name WES for details, see here snv.deeptrio.illumina.WGS.model_name WGS for details, see here snv.deeptrio.nanopore.model_name ONT for details, see here snv.deeptrio.pacbio_hifi.model_name PACBIO for details, see here snv.deepvariant.illumina.WES.model_name WES for details, see here snv.deepvariant.illumina.WGS.model_name WGS for details, see here snv.deepvariant.nanopore.model_name ONT_R104 for details, see here snv.deepvariant.pacbio_hifi.model_name PACBIO for details, see here snv.glnexus.WES.preset DeepVariantWES for details, see here . allowed values: [DeepVariant, DeepVariantWES, DeepVariantWES_MED_DP, DeepVariant_unfiltered] snv.glnexus.WGS.preset DeepVariantWGS for details, see here . allowed values: [DeepVariant, DeepVariantWGS, DeepVariant_unfiltered] str.expansionhunter.aligner dag-aligner for details, see here . allowed values: [dag-aligner, path-aligner] str.expansionhunter.analysis_mode streaming for details, see here . allowed values: [seeking , streaming] str.expansionhunter.log_level warn for details, see here . allowed values: [trace, debug, info, warn, or error] str.expansionhunter.region_extension_length 1000 for details, see here str.expansionhunter.GRCh38.variant_catalog installed for details, see here str.straglr.min_support 2 minimum number of support reads for an expansion to be captured in genome-scan, see here str.straglr.min_cluster_size 2 minimum number of reads required to constitute a cluster (allele) in GMM clustering, see here str.straglr.GRCh38.loci installed from here sv.cutesv.batches 10000000 Batch of genome segmentation interval sv.cutesv.gt_round 500 Maximum round of iteration for alignments searching if perform genotyping sv.cutesv.include_bed Only detect SVs in regions in the BED file sv.cutesv.ivcf Enable to perform force calling using the given vcf file sv.cutesv.max_size 100000 Maximum size of SV to be reported. All SVs are reported when using -1 sv.cutesv.max_split_parts 7 Maximum number of split segments a read may be aligned before it is ignored. All split segments are considered when using -1. (Recommand -1 when applying assembly-based alignment.) sv.cutesv.merge_del_threshold 0 Maximum distance of deletion signals to be merged sv.cutesv.merge_ins_threshold 100 Maximum distance of insertion signals to be merged sv.cutesv.min_mapq 20 Minimum mapping quality value of alignment to be taken into account (recommend 10 for force calling) sv.cutesv.min_read_len 500 Ignores reads that only report alignments with not longer than bp sv.cutesv.min_siglength 10 Minimum length of SV signal to be extracted sv.cutesv.min_size 30 Minimum size of SV to be reported sv.cutesv.min_support 2 Minimum number of reads that support a SV to be reported. Please note that the default is lower than the default of cuteSV itself to prevent missed SV calls. sv.cutesv.read_range 1000 The interval range for counting reads distribution sv.cutesv.report_readid false Enable to report supporting read ids for each SV sv.cutesv.retain_work_dir false Enable to retain temporary folder and files sv.cutesv.write_old_sigs false Enable to output temporary sig files sv.cutesv.nanopore.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.nanopore.diff_ratio_merging_DEL 0.3 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.nanopore.diff_ratio_merging_INS 0.3 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.nanopore.max_cluster_bias_DEL 100 Maximum distance to cluster read together for deletion sv.cutesv.nanopore.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.nanopore.max_cluster_bias_INS 100 Maximum distance to cluster read together for insertion sv.cutesv.nanopore.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.nanopore.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.nanopore.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5 sv.cutesv.pacbio_hifi.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.pacbio_hifi.diff_ratio_merging_DEL 0.5 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.pacbio_hifi.diff_ratio_merging_INS 0.9 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_DEL 1000 Maximum distance to cluster read together for deletion sv.cutesv.pacbio_hifi.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.pacbio_hifi.max_cluster_bias_INS 1000 Maximum distance to cluster read together for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.pacbio_hifi.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.pacbio_hifi.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5 gVCF \u00b6 key default description gvcf.merge_preset DeepVariant allowed values: [gatk, gatk_unfiltered, DeepVariant, DeepVariant_unfiltered] VCF \u00b6 key default description vcf.start allowed values: [normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples]. for reanalysis this defines from which step to start the workflow vcf.annotate.annotsv_cache_dir installed vcf.annotate.ensembl_gene_mapping installed vcf.annotate.vep_buffer_size 1000 for details, see here vcf.annotate.vep_cache_dir installed vcf.annotate.vep_plugin_dir installed vcf.annotate.vep_plugin_hpo installed vcf.annotate.vep_plugin_inheritance installed vcf.annotate.vep_plugin_vkgl_mode 1 allowed values: [0=full VKGL, 1=public VKGL]. update vcf.annotate.GRCh38.vep_plugin_vkgl accordingly vcf.annotate.GRCh38.capice_model installed vcf.annotate.GRCh38.stranger_catalog installed for details, see here vcf.annotate.GRCh38.vep_custom_phylop installed vcf.annotate.GRCh38.vep_plugin_clinvar installed vcf.annotate.GRCh38.vep_plugin_gnomad installed vcf.annotate.GRCh38.vep_plugin_green_db_enabled false enabling is only allowed for academic use, for details see here vcf.annotate.GRCh38.vep_plugin_green_db installed vcf.annotate.GRCh38.vep_plugin_spliceai_indel installed vcf.annotate.GRCh38.vep_plugin_spliceai_snv installed vcf.annotate.GRCh38.vep_plugin_utrannotator installed vcf.annotate.GRCh38.vep_plugin_vkgl installed update vcf.annotate.vep_plugin_vkgl_mode accordingly vcf.classify.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences with classification tree path vcf.classify.GRCh38.decision_tree installed for details, see here vcf.classify_samples.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences per sample with classification tree path vcf.classify_samples.GRCh38.decision_tree installed for details, see here vcf.filter.classes VUS,LP,P for details, see here vcf.filter.consequences true allowed values: [true, false]. true: filter individual consequences, false: keep all consequences for a variant if one consequence filter passes. vcf.filter_samples.classes U1,U2 for details, see here vcf.report.gado_genes installed vcf.report.gado_hpo installed vcf.report.gado_predict_info installed vcf.report.gado_predict_matrix installed vcf.report.include_crams true allowed values: [true, false]. true: include cram files in the report for showing alignments in the genome browser, false: do not include the crams in the report, no aligments are shown in the genome browser. This will result in a smaller report size. vcf.report.max_records vcf.report.max_samples vcf.report.config vcf.report.template configuration file vcf.report.template for details, see here vcf.report.GRCh38.genes installed Profiles \u00b6 VIP pre-defines two profiles. The default profile is Slurm with fallback to local in case Slurm cannot be discovered. key description local for details, see here slurm for details, see here Additional profiles (for details, see here ) can be added to your configuration file and used on the command-line, for example to run VIP on the Amazon, Azure or Google Cloud. Process \u00b6 By default, each process gets assigned 4 cpus , 8GB of memory and a max runtime of 4 hours . Depending on your system specifications and your analysis you might need to use updated values. For information on how to update process configuration see the Nextflow documentation . The following sections list all processes and their non-default configuration. FASTQ \u00b6 process label configuration concat_fastq default concat_fastq_paired_end default minimap2_align cpus=8 memory='16GB' time='23h' minimap2_align_paired_end cpus=8 memory='16GB' time='23h' CRAM \u00b6 process label configuration concat_vcf default cram_validate default cutesv_call cpus=4 memory='8GB' time='5h' deepvariant_call cpus= default memory='2GB * cpus' time='5h' deepvariant_call_duo cpus= default memory='4GB * cpus' time='5h' deepvariant_call_trio cpus= default memory='4GB * cpus' time='5h' deepvariant_concat_gvcf cpus= default memory='2GB' time='30m' deepvariant_concat_vcf cpus= default memory='2GB' time='30m' deepvariant_joint_call cpus= default memory='2GB' time='30m' expansionhunter_call cpus=4 memory='16GB' time='5h' manta_joint_call cpus=4 memory='8GB' time='5h' straglr_call default vcf_merge_str default vcf_merge_sv default gVCF \u00b6 process label configuration gvcf_liftover default gvcf_validate memory='100MB' time='30m' gvcf_merge memory='2GB' time='30m' VCF \u00b6 process label configuration vcf_annotate cpus=4 memory='8GB' time='4h' vcf_annotate_publish default vcf_classify memory = '2GB' vcf_classify_publish default vcf_classify_samples memory = '2GB' vcf_classify_samples_publish default vcf_concat default vcf_filter default vcf_filter_samples default vcf_inheritance memory = '2GB' vcf_liftover default vcf_normalize default vcf_report memory = '4GB' vcf_slice default vcf_split memory='100MB' time='30m' vcf_validate memory='100MB' time='30m' Environment \u00b6 See https://github.com/molgenis/vip/tree/main/config for an overview of available environment variables. Notably this allows to use different Apptainer containers for the tools that VIP relies on.","title":"Config"},{"location":"usage/config/#config","text":"The VIP configuration is stored in Nextflow configuration files. An additional configuration file can be supplied on the command-line to overwrite default parameter values, add/update profiles, configure processes and update environment variables.","title":"Config"},{"location":"usage/config/#parameters","text":"key default description assembly GRCh38 output assembly, allowed values: [GRCh38] GRCh37.reference.chain.GRCh38 installed chain file to convert GRCh37 to GRCh38 data GRCh37.reference.fasta installed GRCh37.reference.fastaFai installed GRCh37.reference.fastaGzi installed GRCh38.reference.fasta installed GCA_000001405.15_GRCh38_no_alt_analysis_set GRCh38.reference.fastaFai installed GRCh38.reference.fastaGzi installed T2T.reference.chain.GRCh38 installed chain file to convert T2T to GRCh38 data T2T.reference.fasta T2T.reference.fastaFai T2T.reference.fastaGzi pcr_performed false Indication if PCR was performed to get the data, if so certain tools will be disabled due to not being compatible with this data. Warning: Please take note of the fact that for a different reference fasta.gz the unzipped referenfasta file is also required. Both the zipped and unzipped fasta should have an index.","title":"Parameters"},{"location":"usage/config/#fastq","text":"key default description GRCh38.reference.fastaMmi installed for details, see here fastp.options for details, see here minimap2.soft_clipping true In SAM output, use soft clipping for supplementary alignments (required when STR calling with Straglr) minimap2.nanopore_preset lr:hq Preset to use for aligning Nanopore data, options: 'lr:hq' 'map-ont'.","title":"FASTQ"},{"location":"usage/config/#cram","text":"key default description cnv.spectre.GRCh38.blacklist installed blacklist in bed format for sites that will be ignored cnv.spectre.GRCh38.metadata installed metadata file for Ns removal, update this file only when using a different GRCh38 version than the one provided by VIP. cram.call_snv true enable/disable the detection of short variants cram.call_str true enable/disable the detection of short tandem repeats cram.call_sv true enable/disable the detection of structural variants. disable this manually in case of non-paired-end Illumina data. snv.deeptrio.illumina.WES.model_name WES for details, see here snv.deeptrio.illumina.WGS.model_name WGS for details, see here snv.deeptrio.nanopore.model_name ONT for details, see here snv.deeptrio.pacbio_hifi.model_name PACBIO for details, see here snv.deepvariant.illumina.WES.model_name WES for details, see here snv.deepvariant.illumina.WGS.model_name WGS for details, see here snv.deepvariant.nanopore.model_name ONT_R104 for details, see here snv.deepvariant.pacbio_hifi.model_name PACBIO for details, see here snv.glnexus.WES.preset DeepVariantWES for details, see here . allowed values: [DeepVariant, DeepVariantWES, DeepVariantWES_MED_DP, DeepVariant_unfiltered] snv.glnexus.WGS.preset DeepVariantWGS for details, see here . allowed values: [DeepVariant, DeepVariantWGS, DeepVariant_unfiltered] str.expansionhunter.aligner dag-aligner for details, see here . allowed values: [dag-aligner, path-aligner] str.expansionhunter.analysis_mode streaming for details, see here . allowed values: [seeking , streaming] str.expansionhunter.log_level warn for details, see here . allowed values: [trace, debug, info, warn, or error] str.expansionhunter.region_extension_length 1000 for details, see here str.expansionhunter.GRCh38.variant_catalog installed for details, see here str.straglr.min_support 2 minimum number of support reads for an expansion to be captured in genome-scan, see here str.straglr.min_cluster_size 2 minimum number of reads required to constitute a cluster (allele) in GMM clustering, see here str.straglr.GRCh38.loci installed from here sv.cutesv.batches 10000000 Batch of genome segmentation interval sv.cutesv.gt_round 500 Maximum round of iteration for alignments searching if perform genotyping sv.cutesv.include_bed Only detect SVs in regions in the BED file sv.cutesv.ivcf Enable to perform force calling using the given vcf file sv.cutesv.max_size 100000 Maximum size of SV to be reported. All SVs are reported when using -1 sv.cutesv.max_split_parts 7 Maximum number of split segments a read may be aligned before it is ignored. All split segments are considered when using -1. (Recommand -1 when applying assembly-based alignment.) sv.cutesv.merge_del_threshold 0 Maximum distance of deletion signals to be merged sv.cutesv.merge_ins_threshold 100 Maximum distance of insertion signals to be merged sv.cutesv.min_mapq 20 Minimum mapping quality value of alignment to be taken into account (recommend 10 for force calling) sv.cutesv.min_read_len 500 Ignores reads that only report alignments with not longer than bp sv.cutesv.min_siglength 10 Minimum length of SV signal to be extracted sv.cutesv.min_size 30 Minimum size of SV to be reported sv.cutesv.min_support 2 Minimum number of reads that support a SV to be reported. Please note that the default is lower than the default of cuteSV itself to prevent missed SV calls. sv.cutesv.read_range 1000 The interval range for counting reads distribution sv.cutesv.report_readid false Enable to report supporting read ids for each SV sv.cutesv.retain_work_dir false Enable to retain temporary folder and files sv.cutesv.write_old_sigs false Enable to output temporary sig files sv.cutesv.nanopore.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.nanopore.diff_ratio_merging_DEL 0.3 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.nanopore.diff_ratio_merging_INS 0.3 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.nanopore.max_cluster_bias_DEL 100 Maximum distance to cluster read together for deletion sv.cutesv.nanopore.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.nanopore.max_cluster_bias_INS 100 Maximum distance to cluster read together for insertion sv.cutesv.nanopore.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.nanopore.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.nanopore.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5 sv.cutesv.pacbio_hifi.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.pacbio_hifi.diff_ratio_merging_DEL 0.5 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.pacbio_hifi.diff_ratio_merging_INS 0.9 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_DEL 1000 Maximum distance to cluster read together for deletion sv.cutesv.pacbio_hifi.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.pacbio_hifi.max_cluster_bias_INS 1000 Maximum distance to cluster read together for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.pacbio_hifi.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.pacbio_hifi.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5","title":"CRAM"},{"location":"usage/config/#gvcf","text":"key default description gvcf.merge_preset DeepVariant allowed values: [gatk, gatk_unfiltered, DeepVariant, DeepVariant_unfiltered]","title":"gVCF"},{"location":"usage/config/#vcf","text":"key default description vcf.start allowed values: [normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples]. for reanalysis this defines from which step to start the workflow vcf.annotate.annotsv_cache_dir installed vcf.annotate.ensembl_gene_mapping installed vcf.annotate.vep_buffer_size 1000 for details, see here vcf.annotate.vep_cache_dir installed vcf.annotate.vep_plugin_dir installed vcf.annotate.vep_plugin_hpo installed vcf.annotate.vep_plugin_inheritance installed vcf.annotate.vep_plugin_vkgl_mode 1 allowed values: [0=full VKGL, 1=public VKGL]. update vcf.annotate.GRCh38.vep_plugin_vkgl accordingly vcf.annotate.GRCh38.capice_model installed vcf.annotate.GRCh38.stranger_catalog installed for details, see here vcf.annotate.GRCh38.vep_custom_phylop installed vcf.annotate.GRCh38.vep_plugin_clinvar installed vcf.annotate.GRCh38.vep_plugin_gnomad installed vcf.annotate.GRCh38.vep_plugin_green_db_enabled false enabling is only allowed for academic use, for details see here vcf.annotate.GRCh38.vep_plugin_green_db installed vcf.annotate.GRCh38.vep_plugin_spliceai_indel installed vcf.annotate.GRCh38.vep_plugin_spliceai_snv installed vcf.annotate.GRCh38.vep_plugin_utrannotator installed vcf.annotate.GRCh38.vep_plugin_vkgl installed update vcf.annotate.vep_plugin_vkgl_mode accordingly vcf.classify.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences with classification tree path vcf.classify.GRCh38.decision_tree installed for details, see here vcf.classify_samples.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences per sample with classification tree path vcf.classify_samples.GRCh38.decision_tree installed for details, see here vcf.filter.classes VUS,LP,P for details, see here vcf.filter.consequences true allowed values: [true, false]. true: filter individual consequences, false: keep all consequences for a variant if one consequence filter passes. vcf.filter_samples.classes U1,U2 for details, see here vcf.report.gado_genes installed vcf.report.gado_hpo installed vcf.report.gado_predict_info installed vcf.report.gado_predict_matrix installed vcf.report.include_crams true allowed values: [true, false]. true: include cram files in the report for showing alignments in the genome browser, false: do not include the crams in the report, no aligments are shown in the genome browser. This will result in a smaller report size. vcf.report.max_records vcf.report.max_samples vcf.report.config vcf.report.template configuration file vcf.report.template for details, see here vcf.report.GRCh38.genes installed","title":"VCF"},{"location":"usage/config/#profiles","text":"VIP pre-defines two profiles. The default profile is Slurm with fallback to local in case Slurm cannot be discovered. key description local for details, see here slurm for details, see here Additional profiles (for details, see here ) can be added to your configuration file and used on the command-line, for example to run VIP on the Amazon, Azure or Google Cloud.","title":"Profiles"},{"location":"usage/config/#process","text":"By default, each process gets assigned 4 cpus , 8GB of memory and a max runtime of 4 hours . Depending on your system specifications and your analysis you might need to use updated values. For information on how to update process configuration see the Nextflow documentation . The following sections list all processes and their non-default configuration.","title":"Process"},{"location":"usage/config/#fastq_1","text":"process label configuration concat_fastq default concat_fastq_paired_end default minimap2_align cpus=8 memory='16GB' time='23h' minimap2_align_paired_end cpus=8 memory='16GB' time='23h'","title":"FASTQ"},{"location":"usage/config/#cram_1","text":"process label configuration concat_vcf default cram_validate default cutesv_call cpus=4 memory='8GB' time='5h' deepvariant_call cpus= default memory='2GB * cpus' time='5h' deepvariant_call_duo cpus= default memory='4GB * cpus' time='5h' deepvariant_call_trio cpus= default memory='4GB * cpus' time='5h' deepvariant_concat_gvcf cpus= default memory='2GB' time='30m' deepvariant_concat_vcf cpus= default memory='2GB' time='30m' deepvariant_joint_call cpus= default memory='2GB' time='30m' expansionhunter_call cpus=4 memory='16GB' time='5h' manta_joint_call cpus=4 memory='8GB' time='5h' straglr_call default vcf_merge_str default vcf_merge_sv default","title":"CRAM"},{"location":"usage/config/#gvcf_1","text":"process label configuration gvcf_liftover default gvcf_validate memory='100MB' time='30m' gvcf_merge memory='2GB' time='30m'","title":"gVCF"},{"location":"usage/config/#vcf_1","text":"process label configuration vcf_annotate cpus=4 memory='8GB' time='4h' vcf_annotate_publish default vcf_classify memory = '2GB' vcf_classify_publish default vcf_classify_samples memory = '2GB' vcf_classify_samples_publish default vcf_concat default vcf_filter default vcf_filter_samples default vcf_inheritance memory = '2GB' vcf_liftover default vcf_normalize default vcf_report memory = '4GB' vcf_slice default vcf_split memory='100MB' time='30m' vcf_validate memory='100MB' time='30m'","title":"VCF"},{"location":"usage/config/#environment","text":"See https://github.com/molgenis/vip/tree/main/config for an overview of available environment variables. Notably this allows to use different Apptainer containers for the tools that VIP relies on.","title":"Environment"},{"location":"usage/input/","text":"Input \u00b6 The --input value is a tab-separated file (sample-sheet) with each row describing the data and metadata of a sample. A minimal sample-sheet for the vcf workflow could look like this: individual_id vcf sample0 sample0.vcf.gz sample1 sample1.vcf.gz sample2 sample2.vcf.gz Sample-sheet values are case sensitive. Columns can contain values of different types: type description boolean allowed values: [ true , false ] enum categorical value file absolute file path or file path relative to the sample sheet file list comma-separated list of file paths string text string list comma-separated list of strings The following sections describe the columns that can be used in every sample-sheet followed by workflow specific columns. Columns \u00b6 column type required default description project_id string vip project identifier, see here family_id string fam family identifier individual_id string yes sample identifier of the individual paternal_id string sample identifier of the father maternal_id string sample identifier of the mother sex enum unknown sex values: [male,female] Please note that an unknown sex leads to a Spectre CNV analysis that assumes female for the ploidy determination of chromosome X. affected boolean unknown affected status whether the individual is affected proband boolean depends 1 individual being reported on hpo_ids string list regex: /HP:\\d{7}/ sequencing_method enum WGS allowed values: [ WES , WGS ], value must be the same for all project samples regions file allowed file extensions: [ bed ]. filter variants overlapping with regions in bed file 2 1 Exception: if no probands are defined in the sample-sheet then all samples are considered to be probands. Columns: FASTQ \u00b6 column type required default description adaptive_sampling file allowed file extensions: [ csv ]. for nanopore adaptive sampling experiments, used to filter stop_receiving reads fastq file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. single-reads file(s) fastq_r1 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #1 fastq_r2 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #2 sequencing_platform enum illumina allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples 3 Either the fastq or the fastq_r1 and fastq_r2 are required. Columns: CRAM \u00b6 column type required default description cram file yes allowed file extensions: [ bam , cram , sam ] sequencing_platform enum illumina allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples Columns: gVCF \u00b6 column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ] gvcf file yes allowed file extensions: [ gvcf , gvcf.gz , gvcf.bgz , vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ] cram file allowed file extensions: [ bam , cram , sam ] Columns: VCF \u00b6 column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ], value must be the same for all project samples vcf file yes allowed file extensions: [ vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ], value must be the same for all project samples cram file allowed file extensions: [ bam , cram , sam ]","title":"Input"},{"location":"usage/input/#input","text":"The --input value is a tab-separated file (sample-sheet) with each row describing the data and metadata of a sample. A minimal sample-sheet for the vcf workflow could look like this: individual_id vcf sample0 sample0.vcf.gz sample1 sample1.vcf.gz sample2 sample2.vcf.gz Sample-sheet values are case sensitive. Columns can contain values of different types: type description boolean allowed values: [ true , false ] enum categorical value file absolute file path or file path relative to the sample sheet file list comma-separated list of file paths string text string list comma-separated list of strings The following sections describe the columns that can be used in every sample-sheet followed by workflow specific columns.","title":"Input"},{"location":"usage/input/#columns","text":"column type required default description project_id string vip project identifier, see here family_id string fam family identifier individual_id string yes sample identifier of the individual paternal_id string sample identifier of the father maternal_id string sample identifier of the mother sex enum unknown sex values: [male,female] Please note that an unknown sex leads to a Spectre CNV analysis that assumes female for the ploidy determination of chromosome X. affected boolean unknown affected status whether the individual is affected proband boolean depends 1 individual being reported on hpo_ids string list regex: /HP:\\d{7}/ sequencing_method enum WGS allowed values: [ WES , WGS ], value must be the same for all project samples regions file allowed file extensions: [ bed ]. filter variants overlapping with regions in bed file 2 1 Exception: if no probands are defined in the sample-sheet then all samples are considered to be probands.","title":"Columns"},{"location":"usage/input/#columns-fastq","text":"column type required default description adaptive_sampling file allowed file extensions: [ csv ]. for nanopore adaptive sampling experiments, used to filter stop_receiving reads fastq file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. single-reads file(s) fastq_r1 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #1 fastq_r2 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #2 sequencing_platform enum illumina allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples 3 Either the fastq or the fastq_r1 and fastq_r2 are required.","title":"Columns: FASTQ"},{"location":"usage/input/#columns-cram","text":"column type required default description cram file yes allowed file extensions: [ bam , cram , sam ] sequencing_platform enum illumina allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples","title":"Columns: CRAM"},{"location":"usage/input/#columns-gvcf","text":"column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ] gvcf file yes allowed file extensions: [ gvcf , gvcf.gz , gvcf.bgz , vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ] cram file allowed file extensions: [ bam , cram , sam ]","title":"Columns: gVCF"},{"location":"usage/input/#columns-vcf","text":"column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ], value must be the same for all project samples vcf file yes allowed file extensions: [ vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ], value must be the same for all project samples cram file allowed file extensions: [ bam , cram , sam ]","title":"Columns: VCF"},{"location":"usage/output/","text":"Output \u00b6 Click here for a live example After VIP completes successfully the path specified by --output contains content similar to: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work intermediates nxf_report.html nxf_timeline.html my_project_id.html my_project_id.vcf.gz my_project_id.vcf.gz.csi Report \u00b6 For each project defined in your --input sample-sheet a set of three files is created: my_project.html my_project.vcf.gz my_project.vcf.gz.csi In case no project identifiers were supplied these files will be called: vip.html vip.vcf.gz vip.vcf.gz.csi vip.html is an interactive report based on vip.vcf.gz that can be viewed in any modern browser vip.vcf.gz contains annotated candidate variants for interpretation vip.vcf.gz.csi is the corresponding index file By default, the report is a self-contained .html file that does not depend on external websites. All data and code to interact with and display this data is contained in one file. This ensures that no internet connection is required to view the report and enables easy sharing with other people. Live example #0 Live example #0 Live example #0 Above: report example Intermediates \u00b6 VIP publishes selected intermediate results to allow reanalysis using the vcf.start parameter . Additionaly these results can be used to understand why variant records did not make it into the report. The content of the intermediates directory depends on the used --workflow and looks similar to: hlhs_famA_grch38_annotations.vcf.gz hlhs_famA_grch38_annotations.vcf.gz.csi hlhs_famA_grch38_classifications.vcf.gz hlhs_famA_grch38_classifications.vcf.gz.csi hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample0_sv.vcf.gz hlhs_famA_grch38_famA_sample0_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample1_sv.vcf.gz hlhs_famA_grch38_famA_sample1_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample2_sv.vcf.gz hlhs_famA_grch38_famA_sample2_sv.vcf.gz.csi Other \u00b6 Besides the result files and intermediate files the following data is generated: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work nxf_report.html nxf_timeline.html For details, see the Nextflow documentation .","title":"Output"},{"location":"usage/output/#output","text":"Click here for a live example After VIP completes successfully the path specified by --output contains content similar to: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work intermediates nxf_report.html nxf_timeline.html my_project_id.html my_project_id.vcf.gz my_project_id.vcf.gz.csi","title":"Output"},{"location":"usage/output/#report","text":"For each project defined in your --input sample-sheet a set of three files is created: my_project.html my_project.vcf.gz my_project.vcf.gz.csi In case no project identifiers were supplied these files will be called: vip.html vip.vcf.gz vip.vcf.gz.csi vip.html is an interactive report based on vip.vcf.gz that can be viewed in any modern browser vip.vcf.gz contains annotated candidate variants for interpretation vip.vcf.gz.csi is the corresponding index file By default, the report is a self-contained .html file that does not depend on external websites. All data and code to interact with and display this data is contained in one file. This ensures that no internet connection is required to view the report and enables easy sharing with other people. Live example #0 Live example #0 Live example #0 Above: report example","title":"Report"},{"location":"usage/output/#intermediates","text":"VIP publishes selected intermediate results to allow reanalysis using the vcf.start parameter . Additionaly these results can be used to understand why variant records did not make it into the report. The content of the intermediates directory depends on the used --workflow and looks similar to: hlhs_famA_grch38_annotations.vcf.gz hlhs_famA_grch38_annotations.vcf.gz.csi hlhs_famA_grch38_classifications.vcf.gz hlhs_famA_grch38_classifications.vcf.gz.csi hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample0_sv.vcf.gz hlhs_famA_grch38_famA_sample0_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample1_sv.vcf.gz hlhs_famA_grch38_famA_sample1_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample2_sv.vcf.gz hlhs_famA_grch38_famA_sample2_sv.vcf.gz.csi","title":"Intermediates"},{"location":"usage/output/#other","text":"Besides the result files and intermediate files the following data is generated: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work nxf_report.html nxf_timeline.html For details, see the Nextflow documentation .","title":"Other"},{"location":"usage/workflow/","text":"Workflow \u00b6 VIP consists of four workflows depending on the type of input data: fastq, bam/cram, gvcf or vcf. The fastq workflow is an extension of the cram workflow. The cram and gvcf workflows are extensions of the vcf workflow. The vcf workflow produces the pipeline outputs as described here . The following sections provide an overview of the steps of each of these workflows. FASTQ \u00b6 The fastq workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Quality reporting and preprocessing using fastp Alignment using minimap2 producing a cram file per sample In case of multiple fastq files per sample, concatenate the cram output files Continue with step 3. of the cram workflow For details, see here . CRAM \u00b6 The cram workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Create validated, indexed .bam file from bam/cram/sam input If a bed file was provide via the sample sheet: generate coverage metrics using MosDepth Discover short tandem repeats and publish as intermediate result. Using ExpansionHunter for Illumina short read data. Using this fork of Straglr for PacBio and Nanopore long read data, this is a fork of this fork(https://github.com/philres/straglr) and is chosen over the original Straglr because of the VCF output that enables VIP to combine it with the SV and SNV data in the VCF workflow. Discover copy number variants for for PacBio and Nanopore long read data using Spectre data and publish as intermediate result. Parallelize cram in chunks consisting of one or more contigs and for each chunk Perform short variant calling with DeepVariant producing a gvcf file per chunk per sample, the gvcfs of the samples in a project are than merged to one vcf per project (using GLnexus . Perform structural variant calling with Manta or cuteSV producing a vcf file per chunk per project. Concatenate short variant calling and structural variant calling vcf files per chunk per sample Continue with step 3. of the vcf workflow For details, see here . gVCF \u00b6 The gvcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .g.vcf.gz file from bcf/bcf.gz/bcf.bgz/gvcf/gvcf.gz/gvcf.bgz/vcf/vcf.gz/vcf.bgz inputs Merge .g.vcf.gz files using GLnexus resulting in one vcf.gz per project Continue with step 3. of the vcf workflow For details, see here . VCF \u00b6 The vcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .vcf.gz file from bcf|bcf.gz|bcf.bgz|vcf|vcf.gz|vcf.bgz input Chunk vcf.gz files and for each chunk Normalize Annotate Classify Filter Perform inheritance matching Classify in the context of samples Filter in the context of samples Concatenate chunks resulting in one vcf.gz file per project If cram data is available slice the cram files to only keep relevant reads Create report For details, see here .","title":"Workflow"},{"location":"usage/workflow/#workflow","text":"VIP consists of four workflows depending on the type of input data: fastq, bam/cram, gvcf or vcf. The fastq workflow is an extension of the cram workflow. The cram and gvcf workflows are extensions of the vcf workflow. The vcf workflow produces the pipeline outputs as described here . The following sections provide an overview of the steps of each of these workflows.","title":"Workflow"},{"location":"usage/workflow/#fastq","text":"The fastq workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Quality reporting and preprocessing using fastp Alignment using minimap2 producing a cram file per sample In case of multiple fastq files per sample, concatenate the cram output files Continue with step 3. of the cram workflow For details, see here .","title":"FASTQ"},{"location":"usage/workflow/#cram","text":"The cram workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Create validated, indexed .bam file from bam/cram/sam input If a bed file was provide via the sample sheet: generate coverage metrics using MosDepth Discover short tandem repeats and publish as intermediate result. Using ExpansionHunter for Illumina short read data. Using this fork of Straglr for PacBio and Nanopore long read data, this is a fork of this fork(https://github.com/philres/straglr) and is chosen over the original Straglr because of the VCF output that enables VIP to combine it with the SV and SNV data in the VCF workflow. Discover copy number variants for for PacBio and Nanopore long read data using Spectre data and publish as intermediate result. Parallelize cram in chunks consisting of one or more contigs and for each chunk Perform short variant calling with DeepVariant producing a gvcf file per chunk per sample, the gvcfs of the samples in a project are than merged to one vcf per project (using GLnexus . Perform structural variant calling with Manta or cuteSV producing a vcf file per chunk per project. Concatenate short variant calling and structural variant calling vcf files per chunk per sample Continue with step 3. of the vcf workflow For details, see here .","title":"CRAM"},{"location":"usage/workflow/#gvcf","text":"The gvcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .g.vcf.gz file from bcf/bcf.gz/bcf.bgz/gvcf/gvcf.gz/gvcf.bgz/vcf/vcf.gz/vcf.bgz inputs Merge .g.vcf.gz files using GLnexus resulting in one vcf.gz per project Continue with step 3. of the vcf workflow For details, see here .","title":"gVCF"},{"location":"usage/workflow/#vcf","text":"The vcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .vcf.gz file from bcf|bcf.gz|bcf.bgz|vcf|vcf.gz|vcf.bgz input Chunk vcf.gz files and for each chunk Normalize Annotate Classify Filter Perform inheritance matching Classify in the context of samples Filter in the context of samples Concatenate chunks resulting in one vcf.gz file per project If cram data is available slice the cram files to only keep relevant reads Create report For details, see here .","title":"VCF"}]} \ No newline at end of file +{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Variant Interpretation Pipeline (VIP) \u00b6 VIP is a flexible human variant interpretation pipeline for rare disease using state-of-the-art pathogenicity prediction ( CAPICE ) and template-based interactive reporting to facilitate decision support. The VIP pipeline can be used starting from either your fastq , bam/cram or .g.vcf/vcf data, every entry point will result in a vcf file with your annotated, classified and filtered variants as well as a interactive HTML report with the same variants, prioritized by the CAPICE pathogenicity score and providing additional aids like a genome browser and a representation of the decisions leading to the VIP classification. VIP can be used for single patients, families or cohort data. Try it yourself Visit https://vip.molgeniscloud.org/ to analyse your own variants Tip Preprint now available at medRxiv Click here for a live example ] Above: report example Above: report example: genome browser","title":"Introduction"},{"location":"#variant-interpretation-pipeline-vip","text":"VIP is a flexible human variant interpretation pipeline for rare disease using state-of-the-art pathogenicity prediction ( CAPICE ) and template-based interactive reporting to facilitate decision support. The VIP pipeline can be used starting from either your fastq , bam/cram or .g.vcf/vcf data, every entry point will result in a vcf file with your annotated, classified and filtered variants as well as a interactive HTML report with the same variants, prioritized by the CAPICE pathogenicity score and providing additional aids like a genome browser and a representation of the decisions leading to the VIP classification. VIP can be used for single patients, families or cohort data. Try it yourself Visit https://vip.molgeniscloud.org/ to analyse your own variants Tip Preprint now available at medRxiv Click here for a live example ] Above: report example Above: report example: genome browser","title":"Variant Interpretation Pipeline (VIP)"},{"location":"about/acknowledgements/","text":"Acknowledgements \u00b6 Standing on the shoulders of giants. This project could not have possible without the existence of many other tools and resources. Among them we would like to thank the people behind the following projects: CAPICE Ensembl Variant Effect Predictor (VEP) Nextflow AlphScore AnnotSV Illumina ExpansionHunter Illumina Manta Illumina SpliceAI igv.js DeepVariant Minimap2 GLnexus Samtools formats and tools Human Phenotype Ontology Consortium Clinical Genomic Database gnomAD ClinVar VKGL phyloP cuteSV Mosdepth Spectre Straglr Stranger fastp","title":"Acknowledgements"},{"location":"about/acknowledgements/#acknowledgements","text":"Standing on the shoulders of giants. This project could not have possible without the existence of many other tools and resources. Among them we would like to thank the people behind the following projects: CAPICE Ensembl Variant Effect Predictor (VEP) Nextflow AlphScore AnnotSV Illumina ExpansionHunter Illumina Manta Illumina SpliceAI igv.js DeepVariant Minimap2 GLnexus Samtools formats and tools Human Phenotype Ontology Consortium Clinical Genomic Database gnomAD ClinVar VKGL phyloP cuteSV Mosdepth Spectre Straglr Stranger fastp","title":"Acknowledgements"},{"location":"about/license/","text":"License \u00b6 VIP is open source and available under the GNU Lesser General Public License v3.0 from https://github.com/molgenis/vip . See https://github.com/molgenis/vip/blob/main/LICENSE for details. GREEN-DB \u00b6 GREEN-DB is free to use for academic users, please refer to the attached LICENSE file here . If you are interested in using the Database commercially, please contact Oxford University Innovation Limited to negotiate a licence. Contact details are enquiries@innovation.ox.ac.uk quoting reference 18096. Relationship to other licences \u00b6 VIP is an aggregate work of many works, each covered by their own licence(s). For the purposes of determining what you can do with specific works in VIP, this policy should be read together with the licence(s) of the relevant tools. For the avoidance of doubt, where any other licence grants rights, this policy does not modify or reduce those rights under those licences.","title":"License"},{"location":"about/license/#license","text":"VIP is open source and available under the GNU Lesser General Public License v3.0 from https://github.com/molgenis/vip . See https://github.com/molgenis/vip/blob/main/LICENSE for details.","title":"License"},{"location":"about/license/#green-db","text":"GREEN-DB is free to use for academic users, please refer to the attached LICENSE file here . If you are interested in using the Database commercially, please contact Oxford University Innovation Limited to negotiate a licence. Contact details are enquiries@innovation.ox.ac.uk quoting reference 18096.","title":"GREEN-DB"},{"location":"about/license/#relationship-to-other-licences","text":"VIP is an aggregate work of many works, each covered by their own licence(s). For the purposes of determining what you can do with specific works in VIP, this policy should be read together with the licence(s) of the relevant tools. For the avoidance of doubt, where any other licence grants rights, this policy does not modify or reduce those rights under those licences.","title":"Relationship to other licences"},{"location":"advanced/annotations/","text":"Annotations \u00b6 VIP annotates variant effects and genotype data for samples using a rich set of tools. Annotions can be used to classify variants using classification trees and displayed in reports . Overview \u00b6 The table contains annotations available in most output files. Depending on the workflow and the configuration used additional annotations might be available, check the output file headers for the complete overview. Similarly, some annotations listed below might be missing from your output file depending on the sample sheet content and configuration. annotation type source description FORMAT/VI string list vip-inheritance-matcher An enumeration of possible inheritance modes (Possible values: AR, AR_C, AD, AD_IP, XLR, XLD) FORMAT/VIC string vip-inheritance-matcher Possible Compound hetrozygote variants FORMAT/VID integer vip-inheritance-matcher De novo variant FORMAT/VIG string list vip-inheritance-matcher Genes with an inheritance match FORMAT/VIM integer vip-inheritance-matcher Inheritance Match: Genotypes, affected statuses and known gene inheritance patterns match FORMAT/VIPC_S string list vip-decision-tree VIP decision tree classification for sample FORMAT/VIPP_S string list vip-decision-tree VIP decision tree path for sample INFO/CSQ/Allele string VEP The variant allele used to calculate the consequence INFO/CSQ/ALLELE_NUM integer VEP Allele nr within the VCF file. INFO/CSQ/ALPHSCORE float VEP plugin AlphScore pathogenicity score for missense variants (see here ) INFO/CSQ/Amino_acids string VEP Reference and variant amino acids INFO/CSQ/ASV_ACMG_class string VEP plugin AnnotSv 'ACMG_class' output INFO/CSQ/ASV_AnnotSV_ranking_criteria string VEP plugin AnnotSv 'AnnotSV_ranking_criteria' output INFO/CSQ/ASV_AnnotSV_ranking_score string VEP plugin AnnotSv 'AnnotSV_ranking_score' output INFO/CSQ/BIOTYPE string VEP Biotype of transcript or regulatory feature INFO/CSQ/CAPICE_CL categorical VEP plugin CAPICE classification (see here ). Categories: B, LB, VUS, LP, P INFO/CSQ/CAPICE_SC float VEP plugin CAPICE score INFO/CSQ/cDNA_position string VEP Position within the cDNA INFO/CSQ/CDS_position string VEP Position within the coding sequence INFO/CSQ/CHECK_REF string VEP Reports variants where the input reference does not match the expected reference INFO/CSQ/CLIN_SIG string list VEP ClinVar classification(s) (do not use, see here ) INFO/CSQ/clinVar_CLNID integer list VEP plugin ClinVar variation identifier INFO/CSQ/clinVar_CLNREVSTAT categorical list VEP plugin ClinVar review status for the Variation ID. Categories: practice_guideline, reviewed_by_expert_panel, criteria_provided, _multiple_submitters, _no_conflicts, _single_submitter, _conflicting_interpretations, no_assertion_criteria_provided, no_assertion_provided INFO/CSQ/clinVar_CLNSIG string VEP plugin Clinical significance for this single variant; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_classifications_of_pathogenicity, Other INFO/CSQ/clinVar_CLNSIGINCL string VEP plugin Clinical significance for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:clinical significance; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_interpretations_of_pathogenicity INFO/CSQ/Codons string VEP Reference and variant codon sequence INFO/CSQ/Consequence string list VEP Effect(s) described as Sequence Ontology term(s) INFO/CSQ/DISTANCE string VEP Shortest distance from variant to transcript INFO/CSQ/existing_InFrame_oORFs string VEP plugin The number of existing inFrame overlapping ORFs (inFrame oORF) at the 5 prime UTR INFO/CSQ/existing_OutOfFrame_oORFs string VEP plugin The number of existing out-of-frame overlapping ORFs (OutOfFrame oORF) at the 5 prime UTR INFO/CSQ/existing_uORFs string VEP plugin The number of existing uORFs with a stop codon within the 5 prime UTR INFO/CSQ/Existing_variation string list VEP Identifier(s) of co-located known variants INFO/CSQ/EXON string VEP The exon number (out of total number) INFO/CSQ/Feature string VEP Ensembl stable ID of feature INFO/CSQ/Feature_type categorical VEP VEP feature type. Categories: Transcript, RegulatoryFeature, MotifFeature INFO/CSQ/FATHMM_MKL_NC float VEP plugin The FATHMM-MKL score for Non-Coding Single Nucleotide Variants (SNVs) INFO/CSQ/five_prime_UTR_variant_annotation string VEP plugin Output the annotation of a given 5 prime UTR variant INFO/CSQ/five_prime_UTR_variant_consequence string VEP plugin Output the variant consequences of a given 5 prime UTR variant: uAUG_gained, uAUG_lost, uSTOP_lost or uFrameshift INFO/CSQ/FLAGS string list VEP Transcript quality flags (cds_start_NF: CDS 5' incomplete, cds_end_NF: CDS 3' incomplete) INFO/CSQ/GADO_PD categorical VEP plugin GADO prediction for the relation between the HPO terms of the proband(s) and the gene, HC: high confidence, LC: low confidence. Categories: LC, HC INFO/CSQ/GADO_SC float VEP plugin The combined prioritization GADO Z-score over the HPO of the proband(s) terms for this case INFO/CSQ/Gene string VEP Ensembl stable ID of affected gene INFO/CSQ/gnomAD_COV float VEP plugin gnomAD coverage (percent of individuals in gnomAD source) INFO/CSQ/gnomAD_AF float VEP plugin gnomAD allele frequency INFO/CSQ/gnomAD_FAF95 float VEP plugin gnomAD filter allele frequency (95% confidence) INFO/CSQ/gnomAD_FAF99 float VEP plugin gnomAD filter allele frequency (99% confidence) INFO/CSQ/gnomAD_HN integer VEP plugin gnomAD number of homozygotes INFO/CSQ/gnomAD_QC string list VEP plugin gnomAD quality control filters that failed INFO/CSQ/gnomAD_SRC categorical VEP plugin gnomAD source (E=exomes, G=genomes, T=total) INFO/CSQ/Grantham string VEP plugin Grantham Matrix score - Grantham, R. Amino Acid Difference Formula to Help Explain Protein Evolution, Science 1974 Sep 6;185(4154):862-4 INFO/CSQ/HGNC_ID integer VEP HGNC gene identifier INFO/CSQ/HGVS_OFFSET string VEP Indicates by how many bases the HGVS notations for this variant have been shifted INFO/CSQ/HGVSc string VEP HGVS nomenclature: coding DNA reference sequence INFO/CSQ/HGVSp string VEP HGVS nomenclature: protein reference sequence INFO/CSQ/HIGH_INF_POS string VEP A flag indicating if the variant falls in a high information position of a transcription factor binding profile (TFBP) INFO/CSQ/HPO string list VEP plugin Human phenotype ontology term that match INFO/CSQ/IMPACT categorical VEP Impact as predicted by VEP. Categories: LOW, MODERATE, HIGH, MODIFIER INFO/CSQ/IncompletePenetrance string VEP plugin Boolean indicating if the gene is known for incomplete penetrance (1:true) INFO/CSQ/InheritanceModesGene string list VEP plugin List of inheritance modes for the gene INFO/CSQ/INTRON string VEP The intron number (out of total number) INFO/CSQ/MOTIF_NAME string VEP The source and identifier of a transcription factor binding profile aligned at this position INFO/CSQ/MOTIF_POS string VEP The relative position of the variation in the aligned TFBP INFO/CSQ/MOTIF_SCORE_CHANGE string VEP The difference in motif score of the reference and variant sequences for the TFBP INFO/CSQ/ncER float VEP plugin The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. INFO/CSQ/PHENO integer list VEP Indicates if existing variant is associated with a phenotype, disease or trait; multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/phyloP string VEP custom Conservation p-values, see here INFO/CSQ/PICK integer VEP Boolean indicating if this is the VEP picked transcript INFO/CSQ/PolyPhen float VEP PolyPhen score INFO/CSQ/Protein_position string VEP Position within the protein INFO/CSQ/PUBMED integer list VEP PubMed citations INFO/CSQ/REFSEQ_MATCH string VEP Flag indicating whether and how the RefSeq model differs from the underlying genome INFO/CSQ/REFSEQ_OFFSET string VEP ? INFO/CSQ/ReMM float VEP plugin The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations in the human genome in terms of Mendelian diseases. INFO/CSQ/SIFT float VEP SIFT score INFO/CSQ/SOMATIC integer list VEP Somatic status of existing variant(s); multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/SOURCE string VEP ? INFO/CSQ/SpliceAI_pred_DP_AG float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor gain INFO/CSQ/SpliceAI_pred_DP_AL float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor loss INFO/CSQ/SpliceAI_pred_DP_DG float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor gain INFO/CSQ/SpliceAI_pred_DP_DL float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor loss INFO/CSQ/SpliceAI_pred_DS_AG float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor gain INFO/CSQ/SpliceAI_pred_DS_AL float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor loss INFO/CSQ/SpliceAI_pred_DS_DG float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor gain INFO/CSQ/SpliceAI_pred_DS_DL float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor loss INFO/CSQ/SpliceAI_pred_SYMBOL string VEP plugin SpliceAI gene symbol INFO/CSQ/STRAND string VEP The DNA strand (1 or -1) on which the transcript/feature lies INFO/CSQ/SYMBOL string VEP Gene symbol INFO/CSQ/SYMBOL_SOURCE string VEP The source of the gene symbol INFO/CSQ/TRANSCRIPTION_FACTORS string VEP ? INFO/CSQ/VIPC string vip-decision-tree VIP decision tree classification for variant effect INFO/CSQ/VIPP string list vip-decision-tree VIP decision tree path for variant effect INFO/CSQ/VKGL string VEP plugin ? INFO/CSQ/VKGL_CL string VEP plugin VKGL consensus variant classification Details \u00b6 VIP uses the Ensemble Effect Predictor to annotate all variants with their consequences. We use VEP with the refseq option for the transcripts, and with the flags for sift and polyphen annotations enabled. Plugins \u00b6 Below we describe the other sources which we annotate using the VEP plugin framework. CAPICE \u00b6 CAPICE is a computational method for predicting the pathogenicity of SNVs and InDels. It is a gradient boosting tree model trained using a variety of genomic annotations used by CADD score and trained on the clinical significance. CAPICE performs consistently across diverse independent synthetic, and real clinical data sets. It ourperforms the current best method in pathogenicity estimation for variants of different molecular consequences and allele frequency. We run the CAPICE application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the CAPICE output file. VKGL \u00b6 The datashare workgroup of VKGL has set up a central database to enable mutual sharing of variant classifications through a partly automatic process. An additional goal is the public sharing of these data. The currently publicly available part of the database consists of DNA variant classifications established based on (former) diagnostic questions. We add the classifications from an export of the database and use a VEP plugin to annotate the VEP output with the classifications from the this file. SpliceAI \u00b6 SpliceAI is an open-source deep learning splicing prediction algorithm that has demonstrated in the past few years its high ability to predict splicing defects caused by DNA variations. We add the scores from the available precomputed scores of SpliceAI and use a copy of the available VEP plugin to annotate the VEP output with the classifications from the this file. AnnotSV \u00b6 AnnotSV is a program for annotating and ranking structural variations from genomes of several organisms. We run the AnnotSV application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the AnnotSV output file. HPO \u00b6 A file based on the HPO phenotype_to_genes.txt is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence. Inheritance \u00b6 A file based on the CGD database is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence. Grantham \u00b6 The Grantham score attempts to predict the distance between two amino acids, in an evolutionary sense. A lower Grantham score reflects less evolutionary distance. A higher Grantham score reflects a greater evolutionary distance. We use a copy of the VEP plugin by Duarte Molha to annotate the VEP output with Grantham scores. GADO \u00b6 GADO can be used to prioritize genes based on the HPO terms of a patient.. We run the GADO commandline application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the GADO output file. AlphScore \u00b6 AlphScore is a method to predict the pathogenicity of missense variants using features derived from AlphaFold2. We add the available precomputed scores of AlphScore using a custom VEP plugin. ncER \u00b6 The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. The ncER file VIP uses is the version provided by GREEN-VARAN (https://github.com/edg1983/GREEN-VARAN) on Zenodo: https://zenodo.org/records/5636163 ReMM \u00b6 The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations (SNVs and small InDels) in the human genome (hg19) in terms of Mendelian diseases. The VEP plugin is build on top of the GREEN-DB dataset (GRCh38) for ReMM scores: https://zenodo.org/records/3955933 FATHMM-MKL \u00b6 FATHMM-MKL predicts the Functional Consequences of Coding and Non-Coding Single Nucleotide Variants (SNVs) This plugin annotates non-coding scores only, and is build on top of the GREEN-DB dataset (GRCh38) for FATHMM-MKL non coding scores: https://zenodo.org/records/3981121 GREEN-DB constraint scores \u00b6 GREEN-DB GREEN-DB is a comprehensive collection of 2.4 million regulatory elements in the human genome collected from previously published databases, high-throughput screenings and functional studies. This plugin annotates the constrain scores only, and is build on top of the GREEN-DB bed files ( GRCh38): https://zenodo.org/records/5636209 GREEN-DB constrains scores are annotated per region type: enhancers, promotors, bivalent, insulators, silencers. If multiple regions of the same type overlap, VIP annotates the highest constraint score.","title":"Annotations"},{"location":"advanced/annotations/#annotations","text":"VIP annotates variant effects and genotype data for samples using a rich set of tools. Annotions can be used to classify variants using classification trees and displayed in reports .","title":"Annotations"},{"location":"advanced/annotations/#overview","text":"The table contains annotations available in most output files. Depending on the workflow and the configuration used additional annotations might be available, check the output file headers for the complete overview. Similarly, some annotations listed below might be missing from your output file depending on the sample sheet content and configuration. annotation type source description FORMAT/VI string list vip-inheritance-matcher An enumeration of possible inheritance modes (Possible values: AR, AR_C, AD, AD_IP, XLR, XLD) FORMAT/VIC string vip-inheritance-matcher Possible Compound hetrozygote variants FORMAT/VID integer vip-inheritance-matcher De novo variant FORMAT/VIG string list vip-inheritance-matcher Genes with an inheritance match FORMAT/VIM integer vip-inheritance-matcher Inheritance Match: Genotypes, affected statuses and known gene inheritance patterns match FORMAT/VIPC_S string list vip-decision-tree VIP decision tree classification for sample FORMAT/VIPP_S string list vip-decision-tree VIP decision tree path for sample INFO/CSQ/Allele string VEP The variant allele used to calculate the consequence INFO/CSQ/ALLELE_NUM integer VEP Allele nr within the VCF file. INFO/CSQ/ALPHSCORE float VEP plugin AlphScore pathogenicity score for missense variants (see here ) INFO/CSQ/Amino_acids string VEP Reference and variant amino acids INFO/CSQ/ASV_ACMG_class string VEP plugin AnnotSv 'ACMG_class' output INFO/CSQ/ASV_AnnotSV_ranking_criteria string VEP plugin AnnotSv 'AnnotSV_ranking_criteria' output INFO/CSQ/ASV_AnnotSV_ranking_score string VEP plugin AnnotSv 'AnnotSV_ranking_score' output INFO/CSQ/BIOTYPE string VEP Biotype of transcript or regulatory feature INFO/CSQ/CAPICE_CL categorical VEP plugin CAPICE classification (see here ). Categories: B, LB, VUS, LP, P INFO/CSQ/CAPICE_SC float VEP plugin CAPICE score INFO/CSQ/cDNA_position string VEP Position within the cDNA INFO/CSQ/CDS_position string VEP Position within the coding sequence INFO/CSQ/CHECK_REF string VEP Reports variants where the input reference does not match the expected reference INFO/CSQ/CLIN_SIG string list VEP ClinVar classification(s) (do not use, see here ) INFO/CSQ/clinVar_CLNID integer list VEP plugin ClinVar variation identifier INFO/CSQ/clinVar_CLNREVSTAT categorical list VEP plugin ClinVar review status for the Variation ID. Categories: practice_guideline, reviewed_by_expert_panel, criteria_provided, _multiple_submitters, _no_conflicts, _single_submitter, _conflicting_interpretations, no_assertion_criteria_provided, no_assertion_provided INFO/CSQ/clinVar_CLNSIG string VEP plugin Clinical significance for this single variant; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_classifications_of_pathogenicity, Other INFO/CSQ/clinVar_CLNSIGINCL string VEP plugin Clinical significance for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:clinical significance; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_interpretations_of_pathogenicity INFO/CSQ/Codons string VEP Reference and variant codon sequence INFO/CSQ/Consequence string list VEP Effect(s) described as Sequence Ontology term(s) INFO/CSQ/DISTANCE string VEP Shortest distance from variant to transcript INFO/CSQ/existing_InFrame_oORFs string VEP plugin The number of existing inFrame overlapping ORFs (inFrame oORF) at the 5 prime UTR INFO/CSQ/existing_OutOfFrame_oORFs string VEP plugin The number of existing out-of-frame overlapping ORFs (OutOfFrame oORF) at the 5 prime UTR INFO/CSQ/existing_uORFs string VEP plugin The number of existing uORFs with a stop codon within the 5 prime UTR INFO/CSQ/Existing_variation string list VEP Identifier(s) of co-located known variants INFO/CSQ/EXON string VEP The exon number (out of total number) INFO/CSQ/Feature string VEP Ensembl stable ID of feature INFO/CSQ/Feature_type categorical VEP VEP feature type. Categories: Transcript, RegulatoryFeature, MotifFeature INFO/CSQ/FATHMM_MKL_NC float VEP plugin The FATHMM-MKL score for Non-Coding Single Nucleotide Variants (SNVs) INFO/CSQ/five_prime_UTR_variant_annotation string VEP plugin Output the annotation of a given 5 prime UTR variant INFO/CSQ/five_prime_UTR_variant_consequence string VEP plugin Output the variant consequences of a given 5 prime UTR variant: uAUG_gained, uAUG_lost, uSTOP_lost or uFrameshift INFO/CSQ/FLAGS string list VEP Transcript quality flags (cds_start_NF: CDS 5' incomplete, cds_end_NF: CDS 3' incomplete) INFO/CSQ/GADO_PD categorical VEP plugin GADO prediction for the relation between the HPO terms of the proband(s) and the gene, HC: high confidence, LC: low confidence. Categories: LC, HC INFO/CSQ/GADO_SC float VEP plugin The combined prioritization GADO Z-score over the HPO of the proband(s) terms for this case INFO/CSQ/Gene string VEP Ensembl stable ID of affected gene INFO/CSQ/gnomAD_COV float VEP plugin gnomAD coverage (percent of individuals in gnomAD source) INFO/CSQ/gnomAD_AF float VEP plugin gnomAD allele frequency INFO/CSQ/gnomAD_FAF95 float VEP plugin gnomAD filter allele frequency (95% confidence) INFO/CSQ/gnomAD_FAF99 float VEP plugin gnomAD filter allele frequency (99% confidence) INFO/CSQ/gnomAD_HN integer VEP plugin gnomAD number of homozygotes INFO/CSQ/gnomAD_QC string list VEP plugin gnomAD quality control filters that failed INFO/CSQ/gnomAD_SRC categorical VEP plugin gnomAD source (E=exomes, G=genomes, T=total) INFO/CSQ/Grantham string VEP plugin Grantham Matrix score - Grantham, R. Amino Acid Difference Formula to Help Explain Protein Evolution, Science 1974 Sep 6;185(4154):862-4 INFO/CSQ/HGNC_ID integer VEP HGNC gene identifier INFO/CSQ/HGVS_OFFSET string VEP Indicates by how many bases the HGVS notations for this variant have been shifted INFO/CSQ/HGVSc string VEP HGVS nomenclature: coding DNA reference sequence INFO/CSQ/HGVSp string VEP HGVS nomenclature: protein reference sequence INFO/CSQ/HIGH_INF_POS string VEP A flag indicating if the variant falls in a high information position of a transcription factor binding profile (TFBP) INFO/CSQ/HPO string list VEP plugin Human phenotype ontology term that match INFO/CSQ/IMPACT categorical VEP Impact as predicted by VEP. Categories: LOW, MODERATE, HIGH, MODIFIER INFO/CSQ/IncompletePenetrance string VEP plugin Boolean indicating if the gene is known for incomplete penetrance (1:true) INFO/CSQ/InheritanceModesGene string list VEP plugin List of inheritance modes for the gene INFO/CSQ/INTRON string VEP The intron number (out of total number) INFO/CSQ/MOTIF_NAME string VEP The source and identifier of a transcription factor binding profile aligned at this position INFO/CSQ/MOTIF_POS string VEP The relative position of the variation in the aligned TFBP INFO/CSQ/MOTIF_SCORE_CHANGE string VEP The difference in motif score of the reference and variant sequences for the TFBP INFO/CSQ/ncER float VEP plugin The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. INFO/CSQ/PHENO integer list VEP Indicates if existing variant is associated with a phenotype, disease or trait; multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/phyloP string VEP custom Conservation p-values, see here INFO/CSQ/PICK integer VEP Boolean indicating if this is the VEP picked transcript INFO/CSQ/PolyPhen float VEP PolyPhen score INFO/CSQ/Protein_position string VEP Position within the protein INFO/CSQ/PUBMED integer list VEP PubMed citations INFO/CSQ/REFSEQ_MATCH string VEP Flag indicating whether and how the RefSeq model differs from the underlying genome INFO/CSQ/REFSEQ_OFFSET string VEP ? INFO/CSQ/ReMM float VEP plugin The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations in the human genome in terms of Mendelian diseases. INFO/CSQ/SIFT float VEP SIFT score INFO/CSQ/SOMATIC integer list VEP Somatic status of existing variant(s); multiple values correspond to multiple values in the Existing_variation field INFO/CSQ/SOURCE string VEP ? INFO/CSQ/SpliceAI_pred_DP_AG float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor gain INFO/CSQ/SpliceAI_pred_DP_AL float VEP plugin SpliceAI predicted effect on splicing. Delta position for acceptor loss INFO/CSQ/SpliceAI_pred_DP_DG float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor gain INFO/CSQ/SpliceAI_pred_DP_DL float VEP plugin SpliceAI predicted effect on splicing. Delta position for donor loss INFO/CSQ/SpliceAI_pred_DS_AG float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor gain INFO/CSQ/SpliceAI_pred_DS_AL float VEP plugin SpliceAI predicted effect on splicing. Delta score for acceptor loss INFO/CSQ/SpliceAI_pred_DS_DG float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor gain INFO/CSQ/SpliceAI_pred_DS_DL float VEP plugin SpliceAI predicted effect on splicing. Delta score for donor loss INFO/CSQ/SpliceAI_pred_SYMBOL string VEP plugin SpliceAI gene symbol INFO/CSQ/STRAND string VEP The DNA strand (1 or -1) on which the transcript/feature lies INFO/CSQ/SYMBOL string VEP Gene symbol INFO/CSQ/SYMBOL_SOURCE string VEP The source of the gene symbol INFO/CSQ/TRANSCRIPTION_FACTORS string VEP ? INFO/CSQ/VIPC string vip-decision-tree VIP decision tree classification for variant effect INFO/CSQ/VIPP string list vip-decision-tree VIP decision tree path for variant effect INFO/CSQ/VKGL string VEP plugin ? INFO/CSQ/VKGL_CL string VEP plugin VKGL consensus variant classification","title":"Overview"},{"location":"advanced/annotations/#details","text":"VIP uses the Ensemble Effect Predictor to annotate all variants with their consequences. We use VEP with the refseq option for the transcripts, and with the flags for sift and polyphen annotations enabled.","title":"Details"},{"location":"advanced/annotations/#plugins","text":"Below we describe the other sources which we annotate using the VEP plugin framework.","title":"Plugins"},{"location":"advanced/annotations/#capice","text":"CAPICE is a computational method for predicting the pathogenicity of SNVs and InDels. It is a gradient boosting tree model trained using a variety of genomic annotations used by CADD score and trained on the clinical significance. CAPICE performs consistently across diverse independent synthetic, and real clinical data sets. It ourperforms the current best method in pathogenicity estimation for variants of different molecular consequences and allele frequency. We run the CAPICE application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the CAPICE output file.","title":"CAPICE"},{"location":"advanced/annotations/#vkgl","text":"The datashare workgroup of VKGL has set up a central database to enable mutual sharing of variant classifications through a partly automatic process. An additional goal is the public sharing of these data. The currently publicly available part of the database consists of DNA variant classifications established based on (former) diagnostic questions. We add the classifications from an export of the database and use a VEP plugin to annotate the VEP output with the classifications from the this file.","title":"VKGL"},{"location":"advanced/annotations/#spliceai","text":"SpliceAI is an open-source deep learning splicing prediction algorithm that has demonstrated in the past few years its high ability to predict splicing defects caused by DNA variations. We add the scores from the available precomputed scores of SpliceAI and use a copy of the available VEP plugin to annotate the VEP output with the classifications from the this file.","title":"SpliceAI"},{"location":"advanced/annotations/#annotsv","text":"AnnotSV is a program for annotating and ranking structural variations from genomes of several organisms. We run the AnnotSV application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the AnnotSV output file.","title":"AnnotSV"},{"location":"advanced/annotations/#hpo","text":"A file based on the HPO phenotype_to_genes.txt is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence.","title":"HPO"},{"location":"advanced/annotations/#inheritance","text":"A file based on the CGD database is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence.","title":"Inheritance"},{"location":"advanced/annotations/#grantham","text":"The Grantham score attempts to predict the distance between two amino acids, in an evolutionary sense. A lower Grantham score reflects less evolutionary distance. A higher Grantham score reflects a greater evolutionary distance. We use a copy of the VEP plugin by Duarte Molha to annotate the VEP output with Grantham scores.","title":"Grantham"},{"location":"advanced/annotations/#gado","text":"GADO can be used to prioritize genes based on the HPO terms of a patient.. We run the GADO commandline application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the GADO output file.","title":"GADO"},{"location":"advanced/annotations/#alphscore","text":"AlphScore is a method to predict the pathogenicity of missense variants using features derived from AlphaFold2. We add the available precomputed scores of AlphScore using a custom VEP plugin.","title":"AlphScore"},{"location":"advanced/annotations/#ncer","text":"The non-coding essential regulation (ncER) score indicates if a region is likely to be essential in terms of regulation. The ncER file VIP uses is the version provided by GREEN-VARAN (https://github.com/edg1983/GREEN-VARAN) on Zenodo: https://zenodo.org/records/5636163","title":"ncER"},{"location":"advanced/annotations/#remm","text":"The Regulatory Mendelian Mutation (ReMM) score was created for relevance prediction of non-coding variations (SNVs and small InDels) in the human genome (hg19) in terms of Mendelian diseases. The VEP plugin is build on top of the GREEN-DB dataset (GRCh38) for ReMM scores: https://zenodo.org/records/3955933","title":"ReMM"},{"location":"advanced/annotations/#fathmm-mkl","text":"FATHMM-MKL predicts the Functional Consequences of Coding and Non-Coding Single Nucleotide Variants (SNVs) This plugin annotates non-coding scores only, and is build on top of the GREEN-DB dataset (GRCh38) for FATHMM-MKL non coding scores: https://zenodo.org/records/3981121","title":"FATHMM-MKL"},{"location":"advanced/annotations/#green-db-constraint-scores","text":"GREEN-DB GREEN-DB is a comprehensive collection of 2.4 million regulatory elements in the human genome collected from previously published databases, high-throughput screenings and functional studies. This plugin annotates the constrain scores only, and is build on top of the GREEN-DB bed files ( GRCh38): https://zenodo.org/records/5636209 GREEN-DB constrains scores are annotated per region type: enhancers, promotors, bivalent, insulators, silencers. If multiple regions of the same type overlap, VIP annotates the highest constraint score.","title":"GREEN-DB constraint scores"},{"location":"advanced/classification_trees/","text":"Classification trees \u00b6 In order to end up with a small list of candidate variant records for interpretation VIP performs variant filtration by: Classify all variant-consequences based on variant annotations Remove variant-consequences based on their classes Annotate remaining variant records using inheritance matcher Classify all variant-consequences based on variant annotations in the context of samples Remove variant-consequences based on their classes. Remove variants that had all their variant-consequences removed The following sections describe the default variant filtration strategies and how to customize classification and filtration. Default \u00b6 VIP contains default filtration strategies for variant-consequences as well as variant-consequences in the context of samples. Variant-consequences \u00b6 The default decision tree to classify variant-consequences works as follows: Each variant-consequence is classified as Benign , Likely Benign , VUS , Likely Pathogenic , Pathogenic or Remove Variant-consequences classified as Benign , Likely Benign and Remove are removed by default. Above: default GRCh38 variant classification tree Variant-consequences (samples) \u00b6 The default decision tree to classify variant-consequences in the context of samples works as follows: Each variant-consequence-sample is classified as U1 (usable: probably), U2 (usable: maybe), U3 (usable: probably not) and U4 (usable: only in cases of suspected incomplete penetrance). Variant-consequences classified as U3 and U4 for all samples are removed by default. Above: default variant sample classification tree Customization \u00b6 The default variant filtration strategy can be customized using the following parameters (see here ): vcf.classify.GRCh38.decision_tree vcf.filter.classes vcf.classify_samples.GRCh38.decision_tree vcf.filter_samples.classes The following repositories might be of interest when creating a new decision tree: vip vip-decision-tree You are free to use your own set of classes in your decision tree. Keep in mind to update the filter classes parameters accordingly.","title":"Classification trees"},{"location":"advanced/classification_trees/#classification-trees","text":"In order to end up with a small list of candidate variant records for interpretation VIP performs variant filtration by: Classify all variant-consequences based on variant annotations Remove variant-consequences based on their classes Annotate remaining variant records using inheritance matcher Classify all variant-consequences based on variant annotations in the context of samples Remove variant-consequences based on their classes. Remove variants that had all their variant-consequences removed The following sections describe the default variant filtration strategies and how to customize classification and filtration.","title":"Classification trees"},{"location":"advanced/classification_trees/#default","text":"VIP contains default filtration strategies for variant-consequences as well as variant-consequences in the context of samples.","title":"Default"},{"location":"advanced/classification_trees/#variant-consequences","text":"The default decision tree to classify variant-consequences works as follows: Each variant-consequence is classified as Benign , Likely Benign , VUS , Likely Pathogenic , Pathogenic or Remove Variant-consequences classified as Benign , Likely Benign and Remove are removed by default. Above: default GRCh38 variant classification tree","title":"Variant-consequences"},{"location":"advanced/classification_trees/#variant-consequences-samples","text":"The default decision tree to classify variant-consequences in the context of samples works as follows: Each variant-consequence-sample is classified as U1 (usable: probably), U2 (usable: maybe), U3 (usable: probably not) and U4 (usable: only in cases of suspected incomplete penetrance). Variant-consequences classified as U3 and U4 for all samples are removed by default. Above: default variant sample classification tree","title":"Variant-consequences (samples)"},{"location":"advanced/classification_trees/#customization","text":"The default variant filtration strategy can be customized using the following parameters (see here ): vcf.classify.GRCh38.decision_tree vcf.filter.classes vcf.classify_samples.GRCh38.decision_tree vcf.filter_samples.classes The following repositories might be of interest when creating a new decision tree: vip vip-decision-tree You are free to use your own set of classes in your decision tree. Keep in mind to update the filter classes parameters accordingly.","title":"Customization"},{"location":"advanced/report_templates/","text":"Report templates \u00b6 VIP outputs a standalone HTML report that can be viewed in any modern browser. The report is based on the input sample sheet information and the output variant vcf data. Default \u00b6 As a default VIP uses a report template that is suitable for most analysis: Above: default report template Customization \u00b6 Using the vcf.report.template parameter (see here ) it is possible to specify a different report template to create reports tailered to your needs. The following repositories might be of interest when creating a new report template: vip-report-api vip-report-template vip-report-vcf vite-plugin-inline The vip-report tool creates reports based on a report template as described in the following repositories: vip-report vip-utils Configuration \u00b6 A configuration .json file can be used in combination with a vcf.report.template to create reports that e.g. show specific variant content or variant filters. The allowed contents of a configuration .json file depends on the vcf.report.template used. For the default vcf.report.template the configuration options are described here .","title":"Report templates"},{"location":"advanced/report_templates/#report-templates","text":"VIP outputs a standalone HTML report that can be viewed in any modern browser. The report is based on the input sample sheet information and the output variant vcf data.","title":"Report templates"},{"location":"advanced/report_templates/#default","text":"As a default VIP uses a report template that is suitable for most analysis: Above: default report template","title":"Default"},{"location":"advanced/report_templates/#customization","text":"Using the vcf.report.template parameter (see here ) it is possible to specify a different report template to create reports tailered to your needs. The following repositories might be of interest when creating a new report template: vip-report-api vip-report-template vip-report-vcf vite-plugin-inline The vip-report tool creates reports based on a report template as described in the following repositories: vip-report vip-utils","title":"Customization"},{"location":"advanced/report_templates/#configuration","text":"A configuration .json file can be used in combination with a vcf.report.template to create reports that e.g. show specific variant content or variant filters. The allowed contents of a configuration .json file depends on the vcf.report.template used. For the default vcf.report.template the configuration options are described here .","title":"Configuration"},{"location":"examples/multi-project/","text":"Multi-project \u00b6 VIP can be used to analyse different projects in one run, producing output files per project. To achieve this you just need to specify different projects in one samplesheet. family_id individual_id paternal_id maternal_id sex affected proband sequencing_platform fastq fastq_r1 fastq_r2 vip0 fam0 individual0 individual1 male true true nanopore path/to/vip0.fastq.gz vip0 fam0 individual1 female false false nanopore path/to/vip1.fastq.gz vip1 fam1 individual2 individual3 individual4 male false false paacbio_hifi path/to/vip2.fastq.gz vip1 fam1 individual3 male false false pacbio_hifi path/to/vip3.fastq.gz vip1 fam1 individual4 female false true pacbio_hifi path/to/vip4.fastq.gz vip2 fam2 individual5 male true true illumina /vip5_1.fastq.gz /vip5_2.fastq.gz Run the pipeline \u00b6 cd vip vip --workflow fastq --input path/to/samplesheet.tsv --output path/to/output/folder For a working example on how to generate output for multiple projects see here .","title":"Multi-project"},{"location":"examples/multi-project/#multi-project","text":"VIP can be used to analyse different projects in one run, producing output files per project. To achieve this you just need to specify different projects in one samplesheet. family_id individual_id paternal_id maternal_id sex affected proband sequencing_platform fastq fastq_r1 fastq_r2 vip0 fam0 individual0 individual1 male true true nanopore path/to/vip0.fastq.gz vip0 fam0 individual1 female false false nanopore path/to/vip1.fastq.gz vip1 fam1 individual2 individual3 individual4 male false false paacbio_hifi path/to/vip2.fastq.gz vip1 fam1 individual3 male false false pacbio_hifi path/to/vip3.fastq.gz vip1 fam1 individual4 female false true pacbio_hifi path/to/vip4.fastq.gz vip2 fam2 individual5 male true true illumina /vip5_1.fastq.gz /vip5_2.fastq.gz","title":"Multi-project"},{"location":"examples/multi-project/#run-the-pipeline","text":"cd vip vip --workflow fastq --input path/to/samplesheet.tsv --output path/to/output/folder For a working example on how to generate output for multiple projects see here .","title":"Run the pipeline"},{"location":"examples/nanopore/","text":"Nanopore \u00b6 To run vip with nanopore data, just specify nanopore as the sequencing_platform in your sample sheet. The other options for this field are \"illumina\" and \"pacbio_hifi\" and can be used in a similar manner. Samplesheet \u00b6 See an example for the samplesheet below, the example show the samplesheet for a run starting from the cram, but the 'sequencing_platform' can also be used to achieve the same for a run with the fastq workflow. individual_id sequencing_platform cram your_sample_id nanopore path/to/your/nanopore.cram Run the pipeline \u00b6 cd vip vip --workflow cram --input path/to/samplesheet.tsv --output path/to/output/folder For an example on how to generate output for FASTQ files using the Oxford Nanopore platform see here .","title":"Nanopore"},{"location":"examples/nanopore/#nanopore","text":"To run vip with nanopore data, just specify nanopore as the sequencing_platform in your sample sheet. The other options for this field are \"illumina\" and \"pacbio_hifi\" and can be used in a similar manner.","title":"Nanopore"},{"location":"examples/nanopore/#samplesheet","text":"See an example for the samplesheet below, the example show the samplesheet for a run starting from the cram, but the 'sequencing_platform' can also be used to achieve the same for a run with the fastq workflow. individual_id sequencing_platform cram your_sample_id nanopore path/to/your/nanopore.cram","title":"Samplesheet"},{"location":"examples/nanopore/#run-the-pipeline","text":"cd vip vip --workflow cram --input path/to/samplesheet.tsv --output path/to/output/folder For an example on how to generate output for FASTQ files using the Oxford Nanopore platform see here .","title":"Run the pipeline"},{"location":"examples/reanalysis/","text":"Reanalysis \u00b6 The VCF workflow can be used to reanalyse data from previous runs with the pipeline. It is possible to start from the normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples steps, this can for example be usefull if you update one of your decision trees, or if you which to re-run the inheritance matching with a different set of low-penetrance genes. For reanalysis the basics of running VIP remain the same, however the correct intermediate file should be provided as input in the sample sheet. Several intermediate results are available in the \"intermediates\" subfolder of your output folder. Furthermore the step form which you whish to start should be added in the configuration parameter \"vcf.start\"","title":"Reanalysis"},{"location":"examples/reanalysis/#reanalysis","text":"The VCF workflow can be used to reanalyse data from previous runs with the pipeline. It is possible to start from the normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples steps, this can for example be usefull if you update one of your decision trees, or if you which to re-run the inheritance matching with a different set of low-penetrance genes. For reanalysis the basics of running VIP remain the same, however the correct intermediate file should be provided as input in the sample sheet. Several intermediate results are available in the \"intermediates\" subfolder of your output folder. Furthermore the step form which you whish to start should be added in the configuration parameter \"vcf.start\"","title":"Reanalysis"},{"location":"get_started/installation/","text":"Installation \u00b6 git clone https://github.com/molgenis/vip bash vip/install.sh","title":"Installation"},{"location":"get_started/installation/#installation","text":"git clone https://github.com/molgenis/vip bash vip/install.sh","title":"Installation"},{"location":"get_started/requirements/","text":"Requirements \u00b6 Before installing VIP please check whether your system meets the following requirements: GNU-based Linux (e.g. Ubuntu, Windows Subsystem for Linux ) with x86_64 architecture Bash \u2265 3.2 Java \u2265 11 Apptainer (setuid installation) 8GB RAM 1 220GB disk space 1) The memory requirements differ per workflow and depend, on the size of your input data, the scheduler that you use, the amount of parallelization. For example, executing VIP using a job scheduler will reduce the memory requirements on the system submitting the jobs to 1-2GB. Optional \u00b6 VIP auto-detects whether Slurm is available on the system and, if available, will schedule its jobs with Slurm. Otherwise, the jobs will be submitted on the local system.","title":"Requirements"},{"location":"get_started/requirements/#requirements","text":"Before installing VIP please check whether your system meets the following requirements: GNU-based Linux (e.g. Ubuntu, Windows Subsystem for Linux ) with x86_64 architecture Bash \u2265 3.2 Java \u2265 11 Apptainer (setuid installation) 8GB RAM 1 220GB disk space 1) The memory requirements differ per workflow and depend, on the size of your input data, the scheduler that you use, the amount of parallelization. For example, executing VIP using a job scheduler will reduce the memory requirements on the system submitting the jobs to 1-2GB.","title":"Requirements"},{"location":"get_started/requirements/#optional","text":"VIP auto-detects whether Slurm is available on the system and, if available, will schedule its jobs with Slurm. Otherwise, the jobs will be submitted on the local system.","title":"Optional"},{"location":"get_started/start_running/","text":"Start running \u00b6 After installation, it is time for a quick test to verify that VIP works using some test data. Input \u00b6 To run VIP you need to provide at least workflow , input and output arguments (described in detail here ). The following example processes a collection of .vcf files. cd vip vip --workflow vcf --input test/resources/multiproject.tsv --output output_multiproject Output \u00b6 Executing the above command displays progress until the pipeline completes. N E X T F L O W ~ version 22.10.6 Launching `vip_vcf.nf` [disturbed_khorana] DSL2 - revision: 8f8c80809c executor > local (27) [- ] process > samtools_index - [71/4bb8b5] process > vcf:convert (2) [100%] 5 of 5 \u2714 [c7/1f8dc7] process > vcf:index (1) [100%] 1 of 1 \u2714 [ad/51639f] process > vcf:stats (1) [100%] 2 of 2 \u2714 [54/a6c17d] process > vcf:merge_vcf (1) [100%] 1 of 1 \u2714 [a5/790ba1] process > vcf:merge_gvcf (1) [100%] 1 of 1 \u2714 [- ] process > vcf:split - [64/dafd8f] process > vcf:normalize (2) [100%] 2 of 2 \u2714 [c4/ed6e06] process > vcf:annotate (1) [100%] 2 of 2 \u2714 [43/c63075] process > vcf:classify (2) [100%] 2 of 2 \u2714 [66/3adcef] process > vcf:filter (2) [100%] 2 of 2 \u2714 [d1/1d89ee] process > vcf:inheritance (1) [100%] 2 of 2 \u2714 [d7/d717a0] process > vcf:classify_samples (1) [100%] 2 of 2 \u2714 [45/0564f9] process > vcf:filter_samples (1) [100%] 2 of 2 \u2714 [- ] process > vcf:concat - [- ] process > vcf:slice - [ad/fc2b6c] process > vcf:report (2) [100%] 3 of 3 \u2714 Duration : 1m 00s CPU hours : 0.2 Succeeded : 27 Results \u00b6 ls -1 output_multiproject/ The output folder contains one report for each project described in test/resources/multiproject.tsv . intermediates nxf_report.html nxf_timeline.html vip0.html vip0.vcf.gz vip0.vcf.gz.csi vip1.html vip1.vcf.gz vip1.vcf.gz.csi vip2.html vip2.vcf.gz vip2.vcf.gz.csi The files vip0.html , vip1.html and vip2.html can be opened in your browser and display an interactive report based on the corresponding .vcf.gz output files. The outputs are described in more detail here .","title":"Start running"},{"location":"get_started/start_running/#start-running","text":"After installation, it is time for a quick test to verify that VIP works using some test data.","title":"Start running"},{"location":"get_started/start_running/#input","text":"To run VIP you need to provide at least workflow , input and output arguments (described in detail here ). The following example processes a collection of .vcf files. cd vip vip --workflow vcf --input test/resources/multiproject.tsv --output output_multiproject","title":"Input"},{"location":"get_started/start_running/#output","text":"Executing the above command displays progress until the pipeline completes. N E X T F L O W ~ version 22.10.6 Launching `vip_vcf.nf` [disturbed_khorana] DSL2 - revision: 8f8c80809c executor > local (27) [- ] process > samtools_index - [71/4bb8b5] process > vcf:convert (2) [100%] 5 of 5 \u2714 [c7/1f8dc7] process > vcf:index (1) [100%] 1 of 1 \u2714 [ad/51639f] process > vcf:stats (1) [100%] 2 of 2 \u2714 [54/a6c17d] process > vcf:merge_vcf (1) [100%] 1 of 1 \u2714 [a5/790ba1] process > vcf:merge_gvcf (1) [100%] 1 of 1 \u2714 [- ] process > vcf:split - [64/dafd8f] process > vcf:normalize (2) [100%] 2 of 2 \u2714 [c4/ed6e06] process > vcf:annotate (1) [100%] 2 of 2 \u2714 [43/c63075] process > vcf:classify (2) [100%] 2 of 2 \u2714 [66/3adcef] process > vcf:filter (2) [100%] 2 of 2 \u2714 [d1/1d89ee] process > vcf:inheritance (1) [100%] 2 of 2 \u2714 [d7/d717a0] process > vcf:classify_samples (1) [100%] 2 of 2 \u2714 [45/0564f9] process > vcf:filter_samples (1) [100%] 2 of 2 \u2714 [- ] process > vcf:concat - [- ] process > vcf:slice - [ad/fc2b6c] process > vcf:report (2) [100%] 3 of 3 \u2714 Duration : 1m 00s CPU hours : 0.2 Succeeded : 27","title":"Output"},{"location":"get_started/start_running/#results","text":"ls -1 output_multiproject/ The output folder contains one report for each project described in test/resources/multiproject.tsv . intermediates nxf_report.html nxf_timeline.html vip0.html vip0.vcf.gz vip0.vcf.gz.csi vip1.html vip1.vcf.gz vip1.vcf.gz.csi vip2.html vip2.vcf.gz vip2.vcf.gz.csi The files vip0.html , vip1.html and vip2.html can be opened in your browser and display an interactive report based on the corresponding .vcf.gz output files. The outputs are described in more detail here .","title":"Results"},{"location":"help/frequently_asked_questions/","text":"Frequently asked questions \u00b6 Why doesn't my report contain any variants? \u00b6 VIP filters your input variants using classification trees for variant-effect and variant-sample combinations. Usually if your report doesn't contain any records this implies that they were filtered out based on these trees. Inspect the _classifications.vcf.gz files in the intermediates output folder to determine why a variant record was removed. Why does VIP fail with an Unexpected Error [InvocationTargetException] ? \u00b6 This issue can mean a number of things, check the .nxf.log for more details. One of the causes is a mismatch between the reference genome that was used to call the variants in your .vcf file and the reference genome used by VIP. For example: Your variants are called with a reference genome that differs from the default VIP reference genome Your variants are called with GRCh37 and you use the GRCh38 assembly or vice-versa Why does VIP fail with a file not found error but my file exists? \u00b6 You might need to update APPTAINER_BIND , for more details see here . To understand the cause of this issue take a look at the Apptainer documentation . Why does VIP fail with an exit code 137? \u00b6 A process has run out of memory. See the config documentation on how to update resource assignments for some or all processes. Why does the genome browser in the report not show all the reads for my structural variant? \u00b6 Since structural variants can be very large it is not possible to keep all reads for these variants in the report. The Cram file with all reads is produced as an intermediate result of VIP, and can be viewed using the desktop version of IGV .","title":"Frequently asked questions"},{"location":"help/frequently_asked_questions/#frequently-asked-questions","text":"","title":"Frequently asked questions"},{"location":"help/frequently_asked_questions/#why-doesnt-my-report-contain-any-variants","text":"VIP filters your input variants using classification trees for variant-effect and variant-sample combinations. Usually if your report doesn't contain any records this implies that they were filtered out based on these trees. Inspect the _classifications.vcf.gz files in the intermediates output folder to determine why a variant record was removed.","title":"Why doesn't my report contain any variants?"},{"location":"help/frequently_asked_questions/#why-does-vip-fail-with-an-unexpected-error-invocationtargetexception","text":"This issue can mean a number of things, check the .nxf.log for more details. One of the causes is a mismatch between the reference genome that was used to call the variants in your .vcf file and the reference genome used by VIP. For example: Your variants are called with a reference genome that differs from the default VIP reference genome Your variants are called with GRCh37 and you use the GRCh38 assembly or vice-versa","title":"Why does VIP fail with an Unexpected Error [InvocationTargetException]?"},{"location":"help/frequently_asked_questions/#why-does-vip-fail-with-a-file-not-found-error-but-my-file-exists","text":"You might need to update APPTAINER_BIND , for more details see here . To understand the cause of this issue take a look at the Apptainer documentation .","title":"Why does VIP fail with a file not found error but my file exists?"},{"location":"help/frequently_asked_questions/#why-does-vip-fail-with-an-exit-code-137","text":"A process has run out of memory. See the config documentation on how to update resource assignments for some or all processes.","title":"Why does VIP fail with an exit code 137?"},{"location":"help/frequently_asked_questions/#why-does-the-genome-browser-in-the-report-not-show-all-the-reads-for-my-structural-variant","text":"Since structural variants can be very large it is not possible to keep all reads for these variants in the report. The Cram file with all reads is produced as an intermediate result of VIP, and can be viewed using the desktop version of IGV .","title":"Why does the genome browser in the report not show all the reads for my structural variant?"},{"location":"help/issues/","text":"Issues \u00b6 Please use this link to report issues or ask questions. We do not have an e-mail address, forum or community chat at the moment. Known issues might be located in one of our VIP repositories: vip vip-decision-tree vip-inheritance vip-inheritance-matcher vip-report vip-report-api vip-report-template vip-report-vcf vip-utils capice vite-plugin-inline","title":"Issues"},{"location":"help/issues/#issues","text":"Please use this link to report issues or ask questions. We do not have an e-mail address, forum or community chat at the moment. Known issues might be located in one of our VIP repositories: vip vip-decision-tree vip-inheritance vip-inheritance-matcher vip-report vip-report-api vip-report-template vip-report-vcf vip-utils capice vite-plugin-inline","title":"Issues"},{"location":"home/key_features/","text":"Key features \u00b6 VIP is an easy to install, easy to use, portable and flexible pipeline implemented using Nextflow . Features include: Workflows for a broad range of input file types: bam , cram , fastq , g.vcf , vcf Produces stand-alone variant interpretation HTML report with integrated genome browser Long-read sequencing support (Oxford Nanopore, PacBio HiFi) Short-read sequencing support (Illumina, both single and paired-end reads) Supports GRCh38, supports GRCh37 and T2T via liftover Supports multiallelic variants Short variant detection Structural variant detection Short tandem repeat detection Copy number variant detection (Oxford Nanopore, PacBio HiFi) Consequence aware Rich set of variant annotations Pathogenic variant prioritization (CAPICE) Phenotype support (HPO) Inheritance matching (VIP inheritance matcher) Variant classification and filtration using customizable decision trees Variant reporting using customizable report templates Quick reanalysis","title":"Key features"},{"location":"home/key_features/#key-features","text":"VIP is an easy to install, easy to use, portable and flexible pipeline implemented using Nextflow . Features include: Workflows for a broad range of input file types: bam , cram , fastq , g.vcf , vcf Produces stand-alone variant interpretation HTML report with integrated genome browser Long-read sequencing support (Oxford Nanopore, PacBio HiFi) Short-read sequencing support (Illumina, both single and paired-end reads) Supports GRCh38, supports GRCh37 and T2T via liftover Supports multiallelic variants Short variant detection Structural variant detection Short tandem repeat detection Copy number variant detection (Oxford Nanopore, PacBio HiFi) Consequence aware Rich set of variant annotations Pathogenic variant prioritization (CAPICE) Phenotype support (HPO) Inheritance matching (VIP inheritance matcher) Variant classification and filtration using customizable decision trees Variant reporting using customizable report templates Quick reanalysis","title":"Key features"},{"location":"usage/command-line-options/","text":"Command-line options \u00b6 The vip command takes input vcf/cram/fastq data and produces a filtered annotated .vcf.gz containing candidate variants of interest. In addition to the .vcf.gz an interactive .html report is produced that can be displayed in any modern web browser. vip --help prints the available command-line options: usage: vip -w -i -o -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf -i, --input path to sample sheet .tsv -o, --output output folder -c, --config path to additional nextflow .cfg (optional) -p, --profile nextflow configuration profile (optional) -r, --resume resume execution using cached results (default: false) -h, --help print this message and exit Required \u00b6 workflow as described here input as described here output as described here Optional \u00b6 config as described here profile the configuration profile to use. allowed values are local , slurm plus any profiles added in --config resume useful to continue executions that was stopped by an error using cached results Defaults \u00b6 By default vip : Assumes an Illumina sequencing platform was used to generate the input data Assumes whole-genome sequencing (WGS) method was used to generate the input data Uses a GRCh38 reference genome ( GCA_000001405.15 / GCF_000001405.26 ) Provides classification trees for default variant filtration. For details, see here Creates reports using a default report template. For details, see here","title":"Command-line options"},{"location":"usage/command-line-options/#command-line-options","text":"The vip command takes input vcf/cram/fastq data and produces a filtered annotated .vcf.gz containing candidate variants of interest. In addition to the .vcf.gz an interactive .html report is produced that can be displayed in any modern web browser. vip --help prints the available command-line options: usage: vip -w -i -o -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf -i, --input path to sample sheet .tsv -o, --output output folder -c, --config path to additional nextflow .cfg (optional) -p, --profile nextflow configuration profile (optional) -r, --resume resume execution using cached results (default: false) -h, --help print this message and exit","title":"Command-line options"},{"location":"usage/command-line-options/#required","text":"workflow as described here input as described here output as described here","title":"Required"},{"location":"usage/command-line-options/#optional","text":"config as described here profile the configuration profile to use. allowed values are local , slurm plus any profiles added in --config resume useful to continue executions that was stopped by an error using cached results","title":"Optional"},{"location":"usage/command-line-options/#defaults","text":"By default vip : Assumes an Illumina sequencing platform was used to generate the input data Assumes whole-genome sequencing (WGS) method was used to generate the input data Uses a GRCh38 reference genome ( GCA_000001405.15 / GCF_000001405.26 ) Provides classification trees for default variant filtration. For details, see here Creates reports using a default report template. For details, see here","title":"Defaults"},{"location":"usage/config/","text":"Config \u00b6 The VIP configuration is stored in Nextflow configuration files. An additional configuration file can be supplied on the command-line to overwrite default parameter values, add/update profiles, configure processes and update environment variables. Parameters \u00b6 key default description assembly GRCh38 output assembly, allowed values: [GRCh38] GRCh37.reference.chain.GRCh38 installed chain file to convert GRCh37 to GRCh38 data GRCh37.reference.fasta installed GRCh37.reference.fastaFai installed GRCh37.reference.fastaGzi installed GRCh38.reference.fasta installed GCA_000001405.15_GRCh38_no_alt_analysis_set GRCh38.reference.fastaFai installed GRCh38.reference.fastaGzi installed T2T.reference.chain.GRCh38 installed chain file to convert T2T to GRCh38 data T2T.reference.fasta T2T.reference.fastaFai T2T.reference.fastaGzi pcr_performed false Indication if PCR was performed to get the data, if so certain tools will be disabled due to not being compatible with this data. Warning: Please take note of the fact that for a different reference fasta.gz the unzipped referenfasta file is also required. Both the zipped and unzipped fasta should have an index. FASTQ \u00b6 key default description GRCh38.reference.fastaMmi installed for details, see here fastp.options for details, see here minimap2.soft_clipping true In SAM output, use soft clipping for supplementary alignments (required when STR calling with Straglr) minimap2.nanopore_preset lr:hq Preset to use for aligning Nanopore data, options: 'lr:hq' 'map-ont'. CRAM \u00b6 key default description cnv.spectre.GRCh38.blacklist installed blacklist in bed format for sites that will be ignored cnv.spectre.GRCh38.metadata installed metadata file for Ns removal, update this file only when using a different GRCh38 version than the one provided by VIP. cram.call_snv true enable/disable the detection of short variants cram.call_str true enable/disable the detection of short tandem repeats cram.call_sv true enable/disable the detection of structural variants. disable this manually in case of non-paired-end Illumina data. snv.deeptrio.illumina.WES.model_name WES for details, see here snv.deeptrio.illumina.WGS.model_name WGS for details, see here snv.deeptrio.nanopore.model_name ONT for details, see here snv.deeptrio.pacbio_hifi.model_name PACBIO for details, see here snv.deepvariant.illumina.WES.model_name WES for details, see here snv.deepvariant.illumina.WGS.model_name WGS for details, see here snv.deepvariant.nanopore.model_name ONT_R104 for details, see here snv.deepvariant.pacbio_hifi.model_name PACBIO for details, see here snv.glnexus.WES.preset DeepVariantWES for details, see here . allowed values: [DeepVariant, DeepVariantWES, DeepVariantWES_MED_DP, DeepVariant_unfiltered] snv.glnexus.WGS.preset DeepVariantWGS for details, see here . allowed values: [DeepVariant, DeepVariantWGS, DeepVariant_unfiltered] str.expansionhunter.aligner dag-aligner for details, see here . allowed values: [dag-aligner, path-aligner] str.expansionhunter.analysis_mode streaming for details, see here . allowed values: [seeking , streaming] str.expansionhunter.log_level warn for details, see here . allowed values: [trace, debug, info, warn, or error] str.expansionhunter.region_extension_length 1000 for details, see here str.expansionhunter.GRCh38.variant_catalog installed for details, see here str.straglr.min_support 2 minimum number of support reads for an expansion to be captured in genome-scan, see here str.straglr.min_cluster_size 2 minimum number of reads required to constitute a cluster (allele) in GMM clustering, see here str.straglr.GRCh38.loci installed from here sv.cutesv.batches 10000000 Batch of genome segmentation interval sv.cutesv.gt_round 500 Maximum round of iteration for alignments searching if perform genotyping sv.cutesv.include_bed Only detect SVs in regions in the BED file sv.cutesv.ivcf Enable to perform force calling using the given vcf file sv.cutesv.max_size 100000 Maximum size of SV to be reported. All SVs are reported when using -1 sv.cutesv.max_split_parts 7 Maximum number of split segments a read may be aligned before it is ignored. All split segments are considered when using -1. (Recommand -1 when applying assembly-based alignment.) sv.cutesv.merge_del_threshold 0 Maximum distance of deletion signals to be merged sv.cutesv.merge_ins_threshold 100 Maximum distance of insertion signals to be merged sv.cutesv.min_mapq 20 Minimum mapping quality value of alignment to be taken into account (recommend 10 for force calling) sv.cutesv.min_read_len 500 Ignores reads that only report alignments with not longer than bp sv.cutesv.min_siglength 10 Minimum length of SV signal to be extracted sv.cutesv.min_size 30 Minimum size of SV to be reported sv.cutesv.min_support 2 Minimum number of reads that support a SV to be reported. Please note that the default is lower than the default of cuteSV itself to prevent missed SV calls. sv.cutesv.read_range 1000 The interval range for counting reads distribution sv.cutesv.report_readid false Enable to report supporting read ids for each SV sv.cutesv.retain_work_dir false Enable to retain temporary folder and files sv.cutesv.write_old_sigs false Enable to output temporary sig files sv.cutesv.nanopore.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.nanopore.diff_ratio_merging_DEL 0.3 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.nanopore.diff_ratio_merging_INS 0.3 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.nanopore.max_cluster_bias_DEL 100 Maximum distance to cluster read together for deletion sv.cutesv.nanopore.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.nanopore.max_cluster_bias_INS 100 Maximum distance to cluster read together for insertion sv.cutesv.nanopore.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.nanopore.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.nanopore.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5 sv.cutesv.pacbio_hifi.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.pacbio_hifi.diff_ratio_merging_DEL 0.5 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.pacbio_hifi.diff_ratio_merging_INS 0.9 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_DEL 1000 Maximum distance to cluster read together for deletion sv.cutesv.pacbio_hifi.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.pacbio_hifi.max_cluster_bias_INS 1000 Maximum distance to cluster read together for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.pacbio_hifi.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.pacbio_hifi.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5 gVCF \u00b6 key default description gvcf.merge_preset DeepVariant allowed values: [gatk, gatk_unfiltered, DeepVariant, DeepVariant_unfiltered] VCF \u00b6 key default description vcf.start allowed values: [normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples]. for reanalysis this defines from which step to start the workflow vcf.annotate.annotsv_cache_dir installed vcf.annotate.ensembl_gene_mapping installed vcf.annotate.vep_buffer_size 1000 for details, see here vcf.annotate.vep_cache_dir installed vcf.annotate.vep_plugin_dir installed vcf.annotate.vep_plugin_hpo installed vcf.annotate.vep_plugin_inheritance installed vcf.annotate.vep_plugin_vkgl_mode 1 allowed values: [0=full VKGL, 1=public VKGL]. update vcf.annotate.GRCh38.vep_plugin_vkgl accordingly vcf.annotate.GRCh38.capice_model installed vcf.annotate.GRCh38.stranger_catalog installed for details, see here vcf.annotate.GRCh38.vep_custom_phylop installed vcf.annotate.GRCh38.vep_plugin_clinvar installed vcf.annotate.GRCh38.vep_plugin_gnomad installed vcf.annotate.GRCh38.vep_plugin_green_db_enabled false enabling is only allowed for academic use, for details see here vcf.annotate.GRCh38.vep_plugin_green_db installed vcf.annotate.GRCh38.vep_plugin_spliceai_indel installed vcf.annotate.GRCh38.vep_plugin_spliceai_snv installed vcf.annotate.GRCh38.vep_plugin_utrannotator installed vcf.annotate.GRCh38.vep_plugin_vkgl installed update vcf.annotate.vep_plugin_vkgl_mode accordingly vcf.classify.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences with classification tree path vcf.classify.GRCh38.decision_tree installed for details, see here vcf.classify_samples.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences per sample with classification tree path vcf.classify_samples.GRCh38.decision_tree installed for details, see here vcf.filter.classes VUS,LP,P for details, see here vcf.filter.consequences true allowed values: [true, false]. true: filter individual consequences, false: keep all consequences for a variant if one consequence filter passes. vcf.filter_samples.classes U1,U2 for details, see here vcf.report.gado_genes installed vcf.report.gado_hpo installed vcf.report.gado_predict_info installed vcf.report.gado_predict_matrix installed vcf.report.include_crams true allowed values: [true, false]. true: include cram files in the report for showing alignments in the genome browser, false: do not include the crams in the report, no aligments are shown in the genome browser. This will result in a smaller report size. vcf.report.max_records vcf.report.max_samples vcf.report.config vcf.report.template configuration file vcf.report.template for details, see here vcf.report.GRCh38.genes installed Profiles \u00b6 VIP pre-defines two profiles. The default profile is Slurm with fallback to local in case Slurm cannot be discovered. key description local for details, see here slurm for details, see here Additional profiles (for details, see here ) can be added to your configuration file and used on the command-line, for example to run VIP on the Amazon, Azure or Google Cloud. Process \u00b6 By default, each process gets assigned 4 cpus , 8GB of memory and a max runtime of 4 hours . Depending on your system specifications and your analysis you might need to use updated values. For information on how to update process configuration see the Nextflow documentation . The following sections list all processes and their non-default configuration. FASTQ \u00b6 process label configuration concat_fastq default concat_fastq_paired_end default minimap2_align cpus=8 memory='16GB' time='23h' minimap2_align_paired_end cpus=8 memory='16GB' time='23h' CRAM \u00b6 process label configuration concat_vcf default cram_validate default cutesv_call cpus=4 memory='8GB' time='5h' deepvariant_call cpus= default memory='2GB * cpus' time='5h' deepvariant_call_duo cpus= default memory='4GB * cpus' time='5h' deepvariant_call_trio cpus= default memory='4GB * cpus' time='5h' deepvariant_concat_gvcf cpus= default memory='2GB' time='30m' deepvariant_concat_vcf cpus= default memory='2GB' time='30m' deepvariant_joint_call cpus= default memory='2GB' time='30m' expansionhunter_call cpus=4 memory='16GB' time='5h' manta_joint_call cpus=4 memory='8GB' time='5h' straglr_call default vcf_merge_str default vcf_merge_sv default gVCF \u00b6 process label configuration gvcf_liftover default gvcf_validate memory='100MB' time='30m' gvcf_merge memory='2GB' time='30m' VCF \u00b6 process label configuration vcf_annotate cpus=4 memory='8GB' time='4h' vcf_annotate_publish default vcf_classify memory = '2GB' vcf_classify_publish default vcf_classify_samples memory = '2GB' vcf_classify_samples_publish default vcf_concat default vcf_filter default vcf_filter_samples default vcf_inheritance memory = '2GB' vcf_liftover default vcf_normalize default vcf_report memory = '4GB' vcf_slice default vcf_split memory='100MB' time='30m' vcf_validate memory='100MB' time='30m' Environment \u00b6 See https://github.com/molgenis/vip/tree/main/config for an overview of available environment variables. Notably this allows to use different Apptainer containers for the tools that VIP relies on.","title":"Config"},{"location":"usage/config/#config","text":"The VIP configuration is stored in Nextflow configuration files. An additional configuration file can be supplied on the command-line to overwrite default parameter values, add/update profiles, configure processes and update environment variables.","title":"Config"},{"location":"usage/config/#parameters","text":"key default description assembly GRCh38 output assembly, allowed values: [GRCh38] GRCh37.reference.chain.GRCh38 installed chain file to convert GRCh37 to GRCh38 data GRCh37.reference.fasta installed GRCh37.reference.fastaFai installed GRCh37.reference.fastaGzi installed GRCh38.reference.fasta installed GCA_000001405.15_GRCh38_no_alt_analysis_set GRCh38.reference.fastaFai installed GRCh38.reference.fastaGzi installed T2T.reference.chain.GRCh38 installed chain file to convert T2T to GRCh38 data T2T.reference.fasta T2T.reference.fastaFai T2T.reference.fastaGzi pcr_performed false Indication if PCR was performed to get the data, if so certain tools will be disabled due to not being compatible with this data. Warning: Please take note of the fact that for a different reference fasta.gz the unzipped referenfasta file is also required. Both the zipped and unzipped fasta should have an index.","title":"Parameters"},{"location":"usage/config/#fastq","text":"key default description GRCh38.reference.fastaMmi installed for details, see here fastp.options for details, see here minimap2.soft_clipping true In SAM output, use soft clipping for supplementary alignments (required when STR calling with Straglr) minimap2.nanopore_preset lr:hq Preset to use for aligning Nanopore data, options: 'lr:hq' 'map-ont'.","title":"FASTQ"},{"location":"usage/config/#cram","text":"key default description cnv.spectre.GRCh38.blacklist installed blacklist in bed format for sites that will be ignored cnv.spectre.GRCh38.metadata installed metadata file for Ns removal, update this file only when using a different GRCh38 version than the one provided by VIP. cram.call_snv true enable/disable the detection of short variants cram.call_str true enable/disable the detection of short tandem repeats cram.call_sv true enable/disable the detection of structural variants. disable this manually in case of non-paired-end Illumina data. snv.deeptrio.illumina.WES.model_name WES for details, see here snv.deeptrio.illumina.WGS.model_name WGS for details, see here snv.deeptrio.nanopore.model_name ONT for details, see here snv.deeptrio.pacbio_hifi.model_name PACBIO for details, see here snv.deepvariant.illumina.WES.model_name WES for details, see here snv.deepvariant.illumina.WGS.model_name WGS for details, see here snv.deepvariant.nanopore.model_name ONT_R104 for details, see here snv.deepvariant.pacbio_hifi.model_name PACBIO for details, see here snv.glnexus.WES.preset DeepVariantWES for details, see here . allowed values: [DeepVariant, DeepVariantWES, DeepVariantWES_MED_DP, DeepVariant_unfiltered] snv.glnexus.WGS.preset DeepVariantWGS for details, see here . allowed values: [DeepVariant, DeepVariantWGS, DeepVariant_unfiltered] str.expansionhunter.aligner dag-aligner for details, see here . allowed values: [dag-aligner, path-aligner] str.expansionhunter.analysis_mode streaming for details, see here . allowed values: [seeking , streaming] str.expansionhunter.log_level warn for details, see here . allowed values: [trace, debug, info, warn, or error] str.expansionhunter.region_extension_length 1000 for details, see here str.expansionhunter.GRCh38.variant_catalog installed for details, see here str.straglr.min_support 2 minimum number of support reads for an expansion to be captured in genome-scan, see here str.straglr.min_cluster_size 2 minimum number of reads required to constitute a cluster (allele) in GMM clustering, see here str.straglr.GRCh38.loci installed from here sv.cutesv.batches 10000000 Batch of genome segmentation interval sv.cutesv.gt_round 500 Maximum round of iteration for alignments searching if perform genotyping sv.cutesv.include_bed Only detect SVs in regions in the BED file sv.cutesv.ivcf Enable to perform force calling using the given vcf file sv.cutesv.max_size 100000 Maximum size of SV to be reported. All SVs are reported when using -1 sv.cutesv.max_split_parts 7 Maximum number of split segments a read may be aligned before it is ignored. All split segments are considered when using -1. (Recommand -1 when applying assembly-based alignment.) sv.cutesv.merge_del_threshold 0 Maximum distance of deletion signals to be merged sv.cutesv.merge_ins_threshold 100 Maximum distance of insertion signals to be merged sv.cutesv.min_mapq 20 Minimum mapping quality value of alignment to be taken into account (recommend 10 for force calling) sv.cutesv.min_read_len 500 Ignores reads that only report alignments with not longer than bp sv.cutesv.min_siglength 10 Minimum length of SV signal to be extracted sv.cutesv.min_size 30 Minimum size of SV to be reported sv.cutesv.min_support 2 Minimum number of reads that support a SV to be reported. Please note that the default is lower than the default of cuteSV itself to prevent missed SV calls. sv.cutesv.read_range 1000 The interval range for counting reads distribution sv.cutesv.report_readid false Enable to report supporting read ids for each SV sv.cutesv.retain_work_dir false Enable to retain temporary folder and files sv.cutesv.write_old_sigs false Enable to output temporary sig files sv.cutesv.nanopore.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.nanopore.diff_ratio_merging_DEL 0.3 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.nanopore.diff_ratio_merging_INS 0.3 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.nanopore.max_cluster_bias_DEL 100 Maximum distance to cluster read together for deletion sv.cutesv.nanopore.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.nanopore.max_cluster_bias_INS 100 Maximum distance to cluster read together for insertion sv.cutesv.nanopore.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.nanopore.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.nanopore.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5 sv.cutesv.pacbio_hifi.diff_ratio_filtering_TRA 0.6 Filter breakpoints with basepair identity less than for translocation sv.cutesv.pacbio_hifi.diff_ratio_merging_DEL 0.5 Do not merge breakpoints with basepair identity more than for deletion sv.cutesv.pacbio_hifi.diff_ratio_merging_INS 0.9 Do not merge breakpoints with basepair identity more than for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_DEL 1000 Maximum distance to cluster read together for deletion sv.cutesv.pacbio_hifi.max_cluster_bias_DUP 500 Maximum distance to cluster read together for duplication sv.cutesv.pacbio_hifi.max_cluster_bias_INS 1000 Maximum distance to cluster read together for insertion sv.cutesv.pacbio_hifi.max_cluster_bias_INV 500 Maximum distance to cluster read together for inversion sv.cutesv.pacbio_hifi.max_cluster_bias_TRA 50 Maximum distance to cluster read together for translocation sv.cutesv.pacbio_hifi.remain_reads_ratio 1.0 The ratio of reads remained in cluster. Set lower when the alignment data have high quality but recommand over 0.5","title":"CRAM"},{"location":"usage/config/#gvcf","text":"key default description gvcf.merge_preset DeepVariant allowed values: [gatk, gatk_unfiltered, DeepVariant, DeepVariant_unfiltered]","title":"gVCF"},{"location":"usage/config/#vcf","text":"key default description vcf.start allowed values: [normalize, annotate, classify, filter, inheritance, classify_samples, filter_samples]. for reanalysis this defines from which step to start the workflow vcf.annotate.annotsv_cache_dir installed vcf.annotate.ensembl_gene_mapping installed vcf.annotate.vep_buffer_size 1000 for details, see here vcf.annotate.vep_cache_dir installed vcf.annotate.vep_plugin_dir installed vcf.annotate.vep_plugin_hpo installed vcf.annotate.vep_plugin_inheritance installed vcf.annotate.vep_plugin_vkgl_mode 1 allowed values: [0=full VKGL, 1=public VKGL]. update vcf.annotate.GRCh38.vep_plugin_vkgl accordingly vcf.annotate.GRCh38.capice_model installed vcf.annotate.GRCh38.stranger_catalog installed for details, see here vcf.annotate.GRCh38.vep_custom_phylop installed vcf.annotate.GRCh38.vep_plugin_clinvar installed vcf.annotate.GRCh38.vep_plugin_gnomad installed vcf.annotate.GRCh38.vep_plugin_green_db_enabled false enabling is only allowed for academic use, for details see here vcf.annotate.GRCh38.vep_plugin_green_db installed vcf.annotate.GRCh38.vep_plugin_spliceai_indel installed vcf.annotate.GRCh38.vep_plugin_spliceai_snv installed vcf.annotate.GRCh38.vep_plugin_utrannotator installed vcf.annotate.GRCh38.vep_plugin_vkgl installed update vcf.annotate.vep_plugin_vkgl_mode accordingly vcf.classify.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences with classification tree path vcf.classify.GRCh38.decision_tree installed for details, see here vcf.classify_samples.annotate_path 1 allowed values: [0=false, 1=true]. annotate variant-consequences per sample with classification tree path vcf.classify_samples.GRCh38.decision_tree installed for details, see here vcf.filter.classes VUS,LP,P for details, see here vcf.filter.consequences true allowed values: [true, false]. true: filter individual consequences, false: keep all consequences for a variant if one consequence filter passes. vcf.filter_samples.classes U1,U2 for details, see here vcf.report.gado_genes installed vcf.report.gado_hpo installed vcf.report.gado_predict_info installed vcf.report.gado_predict_matrix installed vcf.report.include_crams true allowed values: [true, false]. true: include cram files in the report for showing alignments in the genome browser, false: do not include the crams in the report, no aligments are shown in the genome browser. This will result in a smaller report size. vcf.report.max_records vcf.report.max_samples vcf.report.config vcf.report.template configuration file vcf.report.template for details, see here vcf.report.GRCh38.genes installed","title":"VCF"},{"location":"usage/config/#profiles","text":"VIP pre-defines two profiles. The default profile is Slurm with fallback to local in case Slurm cannot be discovered. key description local for details, see here slurm for details, see here Additional profiles (for details, see here ) can be added to your configuration file and used on the command-line, for example to run VIP on the Amazon, Azure or Google Cloud.","title":"Profiles"},{"location":"usage/config/#process","text":"By default, each process gets assigned 4 cpus , 8GB of memory and a max runtime of 4 hours . Depending on your system specifications and your analysis you might need to use updated values. For information on how to update process configuration see the Nextflow documentation . The following sections list all processes and their non-default configuration.","title":"Process"},{"location":"usage/config/#fastq_1","text":"process label configuration concat_fastq default concat_fastq_paired_end default minimap2_align cpus=8 memory='16GB' time='23h' minimap2_align_paired_end cpus=8 memory='16GB' time='23h'","title":"FASTQ"},{"location":"usage/config/#cram_1","text":"process label configuration concat_vcf default cram_validate default cutesv_call cpus=4 memory='8GB' time='5h' deepvariant_call cpus= default memory='2GB * cpus' time='5h' deepvariant_call_duo cpus= default memory='4GB * cpus' time='5h' deepvariant_call_trio cpus= default memory='4GB * cpus' time='5h' deepvariant_concat_gvcf cpus= default memory='2GB' time='30m' deepvariant_concat_vcf cpus= default memory='2GB' time='30m' deepvariant_joint_call cpus= default memory='2GB' time='30m' expansionhunter_call cpus=4 memory='16GB' time='5h' manta_joint_call cpus=4 memory='8GB' time='5h' straglr_call default vcf_merge_str default vcf_merge_sv default","title":"CRAM"},{"location":"usage/config/#gvcf_1","text":"process label configuration gvcf_liftover default gvcf_validate memory='100MB' time='30m' gvcf_merge memory='2GB' time='30m'","title":"gVCF"},{"location":"usage/config/#vcf_1","text":"process label configuration vcf_annotate cpus=4 memory='8GB' time='4h' vcf_annotate_publish default vcf_classify memory = '2GB' vcf_classify_publish default vcf_classify_samples memory = '2GB' vcf_classify_samples_publish default vcf_concat default vcf_filter default vcf_filter_samples default vcf_inheritance memory = '2GB' vcf_liftover default vcf_normalize default vcf_report memory = '4GB' vcf_slice default vcf_split memory='100MB' time='30m' vcf_validate memory='100MB' time='30m'","title":"VCF"},{"location":"usage/config/#environment","text":"See https://github.com/molgenis/vip/tree/main/config for an overview of available environment variables. Notably this allows to use different Apptainer containers for the tools that VIP relies on.","title":"Environment"},{"location":"usage/input/","text":"Input \u00b6 The --input value is a tab-separated file (sample-sheet) with each row describing the data and metadata of a sample. A minimal sample-sheet for the vcf workflow could look like this: individual_id vcf sample0 sample0.vcf.gz sample1 sample1.vcf.gz sample2 sample2.vcf.gz Sample-sheet values are case sensitive. Columns can contain values of different types: type description boolean allowed values: [ true , false ] enum categorical value file absolute file path or file path relative to the sample sheet file list comma-separated list of file paths string text string list comma-separated list of strings The following sections describe the columns that can be used in every sample-sheet followed by workflow specific columns. Columns \u00b6 column type required default description project_id string vip project identifier, see here family_id string fam family identifier individual_id string yes sample identifier of the individual paternal_id string sample identifier of the father maternal_id string sample identifier of the mother sex enum unknown sex values: [male,female] Please note that an unknown sex leads to a Spectre CNV analysis that assumes female for the ploidy determination of chromosome X. affected boolean unknown affected status whether the individual is affected proband boolean depends 1 individual being reported on hpo_ids string list regex: /HP:\\d{7}/ sequencing_method enum WGS allowed values: [ WES , WGS ], value must be the same for all project samples regions file allowed file extensions: [ bed ]. filter variants overlapping with regions in bed file 2 1 Exception: if no probands are defined in the sample-sheet then all samples are considered to be probands. Columns: FASTQ \u00b6 column type required default description adaptive_sampling file allowed file extensions: [ csv ]. for nanopore adaptive sampling experiments, used to filter stop_receiving reads fastq file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. single-reads file(s) fastq_r1 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #1 fastq_r2 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #2 sequencing_platform enum nanopore allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples 3 Either the fastq or the fastq_r1 and fastq_r2 are required. Columns: CRAM \u00b6 column type required default description cram file yes allowed file extensions: [ bam , cram , sam ] sequencing_platform enum illumina allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples Columns: gVCF \u00b6 column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ] gvcf file yes allowed file extensions: [ gvcf , gvcf.gz , gvcf.bgz , vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ] cram file allowed file extensions: [ bam , cram , sam ] Columns: VCF \u00b6 column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ], value must be the same for all project samples vcf file yes allowed file extensions: [ vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ], value must be the same for all project samples cram file allowed file extensions: [ bam , cram , sam ]","title":"Input"},{"location":"usage/input/#input","text":"The --input value is a tab-separated file (sample-sheet) with each row describing the data and metadata of a sample. A minimal sample-sheet for the vcf workflow could look like this: individual_id vcf sample0 sample0.vcf.gz sample1 sample1.vcf.gz sample2 sample2.vcf.gz Sample-sheet values are case sensitive. Columns can contain values of different types: type description boolean allowed values: [ true , false ] enum categorical value file absolute file path or file path relative to the sample sheet file list comma-separated list of file paths string text string list comma-separated list of strings The following sections describe the columns that can be used in every sample-sheet followed by workflow specific columns.","title":"Input"},{"location":"usage/input/#columns","text":"column type required default description project_id string vip project identifier, see here family_id string fam family identifier individual_id string yes sample identifier of the individual paternal_id string sample identifier of the father maternal_id string sample identifier of the mother sex enum unknown sex values: [male,female] Please note that an unknown sex leads to a Spectre CNV analysis that assumes female for the ploidy determination of chromosome X. affected boolean unknown affected status whether the individual is affected proband boolean depends 1 individual being reported on hpo_ids string list regex: /HP:\\d{7}/ sequencing_method enum WGS allowed values: [ WES , WGS ], value must be the same for all project samples regions file allowed file extensions: [ bed ]. filter variants overlapping with regions in bed file 2 1 Exception: if no probands are defined in the sample-sheet then all samples are considered to be probands.","title":"Columns"},{"location":"usage/input/#columns-fastq","text":"column type required default description adaptive_sampling file allowed file extensions: [ csv ]. for nanopore adaptive sampling experiments, used to filter stop_receiving reads fastq file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. single-reads file(s) fastq_r1 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #1 fastq_r2 file list yes 3 allowed file extensions: [ fastq , fastq.gz , fq , fq.gz ]. paired-end reads file(s) #2 sequencing_platform enum nanopore allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples 3 Either the fastq or the fastq_r1 and fastq_r2 are required.","title":"Columns: FASTQ"},{"location":"usage/input/#columns-cram","text":"column type required default description cram file yes allowed file extensions: [ bam , cram , sam ] sequencing_platform enum illumina allowed values: [ illumina , nanopore , pacbio_hifi ], value must be the same for all project samples","title":"Columns: CRAM"},{"location":"usage/input/#columns-gvcf","text":"column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ] gvcf file yes allowed file extensions: [ gvcf , gvcf.gz , gvcf.bgz , vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ] cram file allowed file extensions: [ bam , cram , sam ]","title":"Columns: gVCF"},{"location":"usage/input/#columns-vcf","text":"column type required default description assembly enum GRCh38 allowed values: [ GRCh37 , GRCh38 , T2T ], value must be the same for all project samples vcf file yes allowed file extensions: [ vcf , vcf.gz , vcf.bgz , bcf , bcf.gz , bcf.bgz ], value must be the same for all project samples cram file allowed file extensions: [ bam , cram , sam ]","title":"Columns: VCF"},{"location":"usage/output/","text":"Output \u00b6 Click here for a live example After VIP completes successfully the path specified by --output contains content similar to: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work intermediates nxf_report.html nxf_timeline.html my_project_id.html my_project_id.vcf.gz my_project_id.vcf.gz.csi Report \u00b6 For each project defined in your --input sample-sheet a set of three files is created: my_project.html my_project.vcf.gz my_project.vcf.gz.csi In case no project identifiers were supplied these files will be called: vip.html vip.vcf.gz vip.vcf.gz.csi vip.html is an interactive report based on vip.vcf.gz that can be viewed in any modern browser vip.vcf.gz contains annotated candidate variants for interpretation vip.vcf.gz.csi is the corresponding index file By default, the report is a self-contained .html file that does not depend on external websites. All data and code to interact with and display this data is contained in one file. This ensures that no internet connection is required to view the report and enables easy sharing with other people. Live example #0 Live example #0 Live example #0 Above: report example Intermediates \u00b6 VIP publishes selected intermediate results to allow reanalysis using the vcf.start parameter . Additionaly these results can be used to understand why variant records did not make it into the report. The content of the intermediates directory depends on the used --workflow and looks similar to: hlhs_famA_grch38_annotations.vcf.gz hlhs_famA_grch38_annotations.vcf.gz.csi hlhs_famA_grch38_classifications.vcf.gz hlhs_famA_grch38_classifications.vcf.gz.csi hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample0_sv.vcf.gz hlhs_famA_grch38_famA_sample0_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample1_sv.vcf.gz hlhs_famA_grch38_famA_sample1_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample2_sv.vcf.gz hlhs_famA_grch38_famA_sample2_sv.vcf.gz.csi Other \u00b6 Besides the result files and intermediate files the following data is generated: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work nxf_report.html nxf_timeline.html For details, see the Nextflow documentation .","title":"Output"},{"location":"usage/output/#output","text":"Click here for a live example After VIP completes successfully the path specified by --output contains content similar to: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work intermediates nxf_report.html nxf_timeline.html my_project_id.html my_project_id.vcf.gz my_project_id.vcf.gz.csi","title":"Output"},{"location":"usage/output/#report","text":"For each project defined in your --input sample-sheet a set of three files is created: my_project.html my_project.vcf.gz my_project.vcf.gz.csi In case no project identifiers were supplied these files will be called: vip.html vip.vcf.gz vip.vcf.gz.csi vip.html is an interactive report based on vip.vcf.gz that can be viewed in any modern browser vip.vcf.gz contains annotated candidate variants for interpretation vip.vcf.gz.csi is the corresponding index file By default, the report is a self-contained .html file that does not depend on external websites. All data and code to interact with and display this data is contained in one file. This ensures that no internet connection is required to view the report and enables easy sharing with other people. Live example #0 Live example #0 Live example #0 Above: report example","title":"Report"},{"location":"usage/output/#intermediates","text":"VIP publishes selected intermediate results to allow reanalysis using the vcf.start parameter . Additionaly these results can be used to understand why variant records did not make it into the report. The content of the intermediates directory depends on the used --workflow and looks similar to: hlhs_famA_grch38_annotations.vcf.gz hlhs_famA_grch38_annotations.vcf.gz.csi hlhs_famA_grch38_classifications.vcf.gz hlhs_famA_grch38_classifications.vcf.gz.csi hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz hlhs_famA_grch38_famA_sample0_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample0_sv.vcf.gz hlhs_famA_grch38_famA_sample0_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz hlhs_famA_grch38_famA_sample1_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample1_sv.vcf.gz hlhs_famA_grch38_famA_sample1_sv.vcf.gz.csi hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz hlhs_famA_grch38_famA_sample2_small_variants.vcf.gz.csi hlhs_famA_grch38_famA_sample2_sv.vcf.gz hlhs_famA_grch38_famA_sample2_sv.vcf.gz.csi","title":"Intermediates"},{"location":"usage/output/#other","text":"Besides the result files and intermediate files the following data is generated: .nextflow .nxf.home .nxf.log .nxf.tmp .nxf.work nxf_report.html nxf_timeline.html For details, see the Nextflow documentation .","title":"Other"},{"location":"usage/workflow/","text":"Workflow \u00b6 VIP consists of four workflows depending on the type of input data: fastq, bam/cram, gvcf or vcf. The fastq workflow is an extension of the cram workflow. The cram and gvcf workflows are extensions of the vcf workflow. The vcf workflow produces the pipeline outputs as described here . The following sections provide an overview of the steps of each of these workflows. FASTQ \u00b6 The fastq workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Quality reporting and preprocessing using fastp Alignment using minimap2 producing a cram file per sample In case of multiple fastq files per sample, concatenate the cram output files Continue with step 3. of the cram workflow For details, see here . CRAM \u00b6 The cram workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Create validated, indexed .bam file from bam/cram/sam input If a bed file was provide via the sample sheet: generate coverage metrics using MosDepth Discover short tandem repeats and publish as intermediate result. Using ExpansionHunter for Illumina short read data. Using this fork of Straglr for PacBio and Nanopore long read data, this is a fork of this fork(https://github.com/philres/straglr) and is chosen over the original Straglr because of the VCF output that enables VIP to combine it with the SV and SNV data in the VCF workflow. Discover copy number variants for for PacBio and Nanopore long read data using Spectre data and publish as intermediate result. Parallelize cram in chunks consisting of one or more contigs and for each chunk Perform short variant calling with DeepVariant producing a gvcf file per chunk per sample, the gvcfs of the samples in a project are than merged to one vcf per project (using GLnexus . Perform structural variant calling with Manta or cuteSV producing a vcf file per chunk per project. Concatenate short variant calling and structural variant calling vcf files per chunk per sample Continue with step 3. of the vcf workflow For details, see here . gVCF \u00b6 The gvcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .g.vcf.gz file from bcf/bcf.gz/bcf.bgz/gvcf/gvcf.gz/gvcf.bgz/vcf/vcf.gz/vcf.bgz inputs Merge .g.vcf.gz files using GLnexus resulting in one vcf.gz per project Continue with step 3. of the vcf workflow For details, see here . VCF \u00b6 The vcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .vcf.gz file from bcf|bcf.gz|bcf.bgz|vcf|vcf.gz|vcf.bgz input Chunk vcf.gz files and for each chunk Normalize Annotate Classify Filter Perform inheritance matching Classify in the context of samples Filter in the context of samples Concatenate chunks resulting in one vcf.gz file per project If cram data is available slice the cram files to only keep relevant reads Create report For details, see here .","title":"Workflow"},{"location":"usage/workflow/#workflow","text":"VIP consists of four workflows depending on the type of input data: fastq, bam/cram, gvcf or vcf. The fastq workflow is an extension of the cram workflow. The cram and gvcf workflows are extensions of the vcf workflow. The vcf workflow produces the pipeline outputs as described here . The following sections provide an overview of the steps of each of these workflows.","title":"Workflow"},{"location":"usage/workflow/#fastq","text":"The fastq workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Quality reporting and preprocessing using fastp Alignment using minimap2 producing a cram file per sample In case of multiple fastq files per sample, concatenate the cram output files Continue with step 3. of the cram workflow For details, see here .","title":"FASTQ"},{"location":"usage/workflow/#cram","text":"The cram workflow consists of the following steps: Parallelize sample sheet per sample and for each sample Create validated, indexed .bam file from bam/cram/sam input If a bed file was provide via the sample sheet: generate coverage metrics using MosDepth Discover short tandem repeats and publish as intermediate result. Using ExpansionHunter for Illumina short read data. Using this fork of Straglr for PacBio and Nanopore long read data, this is a fork of this fork(https://github.com/philres/straglr) and is chosen over the original Straglr because of the VCF output that enables VIP to combine it with the SV and SNV data in the VCF workflow. Discover copy number variants for for PacBio and Nanopore long read data using Spectre data and publish as intermediate result. Parallelize cram in chunks consisting of one or more contigs and for each chunk Perform short variant calling with DeepVariant producing a gvcf file per chunk per sample, the gvcfs of the samples in a project are than merged to one vcf per project (using GLnexus . Perform structural variant calling with Manta or cuteSV producing a vcf file per chunk per project. Concatenate short variant calling and structural variant calling vcf files per chunk per sample Continue with step 3. of the vcf workflow For details, see here .","title":"CRAM"},{"location":"usage/workflow/#gvcf","text":"The gvcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .g.vcf.gz file from bcf/bcf.gz/bcf.bgz/gvcf/gvcf.gz/gvcf.bgz/vcf/vcf.gz/vcf.bgz inputs Merge .g.vcf.gz files using GLnexus resulting in one vcf.gz per project Continue with step 3. of the vcf workflow For details, see here .","title":"gVCF"},{"location":"usage/workflow/#vcf","text":"The vcf workflow consists of the following steps: For each project in the sample sheet Create validated, indexed .vcf.gz file from bcf|bcf.gz|bcf.bgz|vcf|vcf.gz|vcf.bgz input Chunk vcf.gz files and for each chunk Normalize Annotate Classify Filter Perform inheritance matching Classify in the context of samples Filter in the context of samples Concatenate chunks resulting in one vcf.gz file per project If cram data is available slice the cram files to only keep relevant reads Create report For details, see here .","title":"VCF"}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index d6e0bf80..a897d310 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,102 +2,102 @@ https://molgenis.github.io/vip/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/about/acknowledgements/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/about/license/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/advanced/annotations/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/advanced/classification_trees/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/advanced/report_templates/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/examples/multi-project/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/examples/nanopore/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/examples/reanalysis/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/get_started/installation/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/get_started/requirements/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/get_started/start_running/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/help/frequently_asked_questions/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/help/issues/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/home/key_features/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/usage/command-line-options/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/usage/config/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/usage/input/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/usage/output/ - 2024-12-02 + 2024-12-04 daily https://molgenis.github.io/vip/usage/workflow/ - 2024-12-02 + 2024-12-04 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index cfe721b9575d1aeccd97da0c9ffacdbbd7bf575c..a207ce4bd5086eb67b64bca82aadebcd85104085 100644 GIT binary patch delta 385 zcmV-{0e=3k1Fr)IABzYGD-}?Y2OkLZunmcO>l18|E*^jRZ4SE&CZWiu>ap2ZJ7gbw z;_+1d`2JFVsh-=1V@^R(cQ%>jR86YOH|N%MZG;qOSfq(*bmAxLi2E3H`{rxaKJ2XA z0=&1jatzV8wBs1q(RX=ht>37`dw@ePbggl)Hd3b2LDlnOr~?!jeS-KETI7l2~L6Nbgzf@NFSaP2_O z*mu^GvJZ~UO(MDAkM0y_bZ5MF1A^vMurv#d=zN@K;JyyTSjVO81o%VdfyTm-#7ow~ fjdrswAGyYK9ztCIKXa?>H?92*bs;87)(rpvvdy|n delta 385 zcmV-{0e=3k1Fr)IABzYGn7K`n2OkLRunmcO+Y_{rE*^jBZ4SE&CZWiu>ap2ZJ7gbw z;_+0yJpZV_R!{B2F{dD?JDbdMswUOtyL0QhHbROsEYid@I`NZr#C;69eeg_uyr%?owv93qY~r3B%%U!LluExOO0C z>^tj8*$2nwCXrn5M|X-dx-(w80YP&rSegY!bUw~Aa9;;vtm9I40{kKKKx5%Z;w5Y0 fM!VUTk6dFq4Columns: FASTQsequencing_platform enum -illumina +nanopore allowed values: [illumina,nanopore,pacbio_hifi], value must be the same for all project samples