diff --git a/R/main.R b/R/main.R
index c4116d3..1f8404b 100644
--- a/R/main.R
+++ b/R/main.R
@@ -188,10 +188,12 @@ generate_cpsr_report <- function(yaml_fname = NULL) {
),
collapse = ", "
)
- pcgrr::log4r_info(paste0(
- "Variants were found in the following cancer ",
- "predisposition genes: ", gene_hits
- ))
+ if(nchar(gene_hits) > 0){
+ pcgrr::log4r_info(paste0(
+ "Variants were found in the following cancer ",
+ "predisposition genes: ", gene_hits
+ ))
+ }
if (cps_report$content$snv_indel$v_stat_sf$n > 0) {
sf_hits <- paste(
@@ -212,7 +214,7 @@ generate_cpsr_report <- function(yaml_fname = NULL) {
)
pcgrr::log4r_info(
- "Generating hyperlinked annotations for output data frames"
+ "Generating hyperlinked annotations for output data tables"
)
for (c in c("sf", "cpg_non_sf", "gwas", "bm")) {
if (NROW(
diff --git a/inst/templates/quarto/cpsr_gwas.qmd b/inst/templates/quarto/cpsr_gwas.qmd
index f5a380f..1a9b25a 100644
--- a/inst/templates/quarto/cpsr_gwas.qmd
+++ b/inst/templates/quarto/cpsr_gwas.qmd
@@ -2,7 +2,9 @@
-```{r prepare_gwas_data, echo=F, results='asis'}
+```{r prepare_gwas_data}
+#| echo: false
+#| results: asis
show_gwas_filters <- F
missing_gwas_items <- T
@@ -13,7 +15,10 @@ if (NROW(cps_report[["content"]][["snv_indel"]]$callset$variant_display$gwas) >
```
-```{r gwas_cancer, echo=F, results = 'asis', eval = show_gwas_filters}
+```{r gwas_cancer}
+#| echo: false
+#| results: asis
+#| eval: !expr show_gwas_filters
cat("A total of N = ", NROW(cps_report[["content"]][["snv_indel"]]$callset$variant_display$gwas), " other germline variant(s) in the query VCF are associated with cancer phenotypes, as found through [genome-wide association studies](https://www.ebi.ac.uk/gwas/):")
cat("\n")
@@ -56,7 +61,11 @@ DT::datatable(variants_gwas_cancer,
# htmltools::br()
```
-```{r gwas_cancer_missing, echo=F, results = 'asis', eval = missing_gwas_items}
+```{r gwas_cancer_missing}
+#| echo: false
+#| results: asis
+#| eval: !expr missing_gwas_items
+
cat("NO GWAS tag SNPs were found beyond the variants reported in the targeted cancer predisposition genes ", sep = "\n")
cat("\n")
```
diff --git a/pkgdown/index.md b/pkgdown/index.md
index e76e8c9..95d3ee7 100755
--- a/pkgdown/index.md
+++ b/pkgdown/index.md
@@ -10,7 +10,7 @@ The *Cancer Predisposition Sequencing Reporter (CPSR)* is a computational workfl
1) Flexible **selection of cancer predisposition genes** subject to analysis and reporting - through the use of virtual gene panels
2) **Variant classification** (*Pathogenic* to _Benign_) through a dedicated implementation of [ACMG/AMP guidelines](https://pubmed.ncbi.nlm.nih.gov/25741868/)
-3) **Detection of germline biomarkers** - for prognosis, diagnosis, or drug sensitivity/resistance
+3) **Detection of germline biomarkers** - for prognosis, diagnosis, or drug sensitivity/resistance in cancer
4) Reporting of **secondary/incidental findings** ([ACMG recommendations](https://pubmed.ncbi.nlm.nih.gov/37347242/))
5) **Interactive HTML output report** with detailed variant information, gene annotations, and external links to relevant databases
@@ -25,9 +25,15 @@ Snapshots of sections in the [quarto](https://quarto.org)-based cancer predispos
### News
+
+* *September 2024*: **2.1.0 release**
+ - data bundle upgrade
+ - re-calibration of classification tresholds
+ - [CHANGELOG](https://sigven.github.io/cpsr/articles/CHANGELOG.html)
+
* *August 2024*: **2.0.3 release**
- - patch with bug fix for copying in quarto templates ([pr62](https://github.com/sigven/cpsr/pull/62))
- - [CHANGELOG](https://sigven.github.io/cpsr/articles/CHANGELOG.html)
+ - patch with bug fix for copying in quarto templates ([pr62](https://github.com/sigven/cpsr/pull/62))
+ - [CHANGELOG](https://sigven.github.io/cpsr/articles/CHANGELOG.html)
* *July 2024*: **2.0.1 release**
- patch with bug fix for mitochondrial input variants ([pr245](https://github.com/sigven/pcgr/pull/245))
@@ -43,16 +49,10 @@ Snapshots of sections in the [quarto](https://quarto.org)-based cancer predispos
* *November 2022*: **1.0.1 release**
* Added CPSR logo (designed by [Hal Nakken](https://halvetica.net))
-* *February 2022*: **1.0.0 release**
- * Complete restructure of code and Conda installation routines, contributed largely by the great [@pdiakumis](https://github.com/pdiakumis)
- * Updated bundle (ClinVar, CancerMine, UniprotKB, PanelApp, CIViC, GWAS catalog)
- * Software upgrade (VEP, R/BioConductor)
- * New documentation site (https://sigven.github.io/cpsr)
-
### Example report
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12734384.svg)](https://doi.org/10.5281/zenodo.12734384)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13855978.svg)](https://doi.org/10.5281/zenodo.13855978)
### Getting started
diff --git a/vignettes/CHANGELOG.Rmd b/vignettes/CHANGELOG.Rmd
index e5bf159..1e8586e 100644
--- a/vignettes/CHANGELOG.Rmd
+++ b/vignettes/CHANGELOG.Rmd
@@ -3,6 +3,17 @@ title: "Changelog"
output: rmarkdown::html_document
---
+## v2.1.0
+
+- Date: **2024-09-29**
+- Major data updates
+ - ClinVar (2024-09)
+ - dbNSFP (v4.8)
+ - CIViC (2024-09-18)
+- Adjusted thresholds for CPSR variant classification based on calibration against ClinVar (Sept 2024 release)
+- Added link to chosen virtual panel in HTML report
+- Created new column `ALTERATION` in variant tables of HTML report, a joint annotation of `HGVSp` and `HGVSc`
+
## v2.0.3
- Date: **2024-08-01**
diff --git a/vignettes/annotation_resources.Rmd b/vignettes/annotation_resources.Rmd
index 8eba8c0..6f3d311 100644
--- a/vignettes/annotation_resources.Rmd
+++ b/vignettes/annotation_resources.Rmd
@@ -7,7 +7,7 @@ output: rmarkdown::html_document
* [VEP v112](http://www.ensembl.org/info/docs/tools/vep/index.html) - Variant Effect Predictor ([GENCODE v46](https://www.gencodegenes.org/human/) as gene reference database (v19 for grch37))
### *Insilico* predictions of effect of coding variants
- * [dBNSFP](https://sites.google.com/site/jpopgen/dbNSFP) - database of non-synonymous functional predictions (v4.5, November 2023)
+ * [dBNSFP](https://sites.google.com/site/jpopgen/dbNSFP) - database of non-synonymous functional predictions (v4.8, June 2024)
### Variant frequency databases
* [gnomAD](http://exac.broadinstitute.org/) - germline variant frequencies exome-wide (r2.1, October 2018)
@@ -15,19 +15,19 @@ output: rmarkdown::html_document
* [Cancer Hotspots](http://cancerhotspots.org) - a resource for statistically significant mutations in cancer (v2, 2017)
### Variant databases of clinical utility
- * [ClinVar](http://www.ncbi.nlm.nih.gov/clinvar/) - database of clinically related variants (June 2024)
- * [CIViC](https://civicdb.org) - clinical interpretations of variants in cancer (June 21st 2024)
+ * [ClinVar](http://www.ncbi.nlm.nih.gov/clinvar/) - database of clinically related variants (September 2024)
+ * [CIViC](https://civicdb.org) - clinical interpretations of variants in cancer (September 18th 2024)
### Protein domains/functional features
- * [UniProt/SwissProt KnowledgeBase](http://www.uniprot.org) - resource on protein sequence and functional information (2024_03)
- * [Pfam](http://pfam.xfam.org) - database of protein families and domains (v35.0, November 2021)
+ * [UniProt/SwissProt KnowledgeBase](http://www.uniprot.org) - resource on protein sequence and functional information (2024_04)
+ * [Pfam](http://pfam.xfam.org) - database of protein families and domains (v37.0)
### Cancer gene knowledge bases
* [CancerMine](http://bionlp.bcgsc.ca/cancermine/) - Literature-mined database of tumor suppressor genes/proto-oncogenes (v50, March 2023)
- * [Genomics England PanelApp](https://panelapp.genomicsengland.co.uk) - cancer phenotype panels as of June 2024
+ * [Genomics England PanelApp](https://panelapp.genomicsengland.co.uk) - cancer phenotype panels as of August 2024
* [Cancer Gene Census](https://www.sanger.ac.uk/data/cancer-gene-census/) - genes implicated with cancer susceptibility (v100)
### Phenotype ontologies
- * [UMLS/MedGen](https://www.ncbi.nlm.nih.gov/medgen/) - May 2024
- * [Disease Ontology](https://disease-ontology.org/) - April 2024
- * [Experimental Factor Ontology](https://github.com/EBISPOT/efo) - v3.66.0
+ * [UMLS/MedGen](https://www.ncbi.nlm.nih.gov/medgen/) - August 2024
+ * [Disease Ontology](https://disease-ontology.org/) - August 2024
+ * [Experimental Factor Ontology](https://github.com/EBISPOT/efo) - v3.69.0
diff --git a/vignettes/output.Rmd b/vignettes/output.Rmd
index 94eb7e8..175d512 100644
--- a/vignettes/output.Rmd
+++ b/vignettes/output.Rmd
@@ -65,7 +65,7 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
| Tag | Description |
|-----|-------------|
-| `CSQ` | Complete consequence annotations from VEP. Format (separated by a `|`): `Allele`, `Consequence`, `IMPACT`, `SYMBOL`, `Gene`, `Feature_type`, `Feature`, `BIOTYPE`, `EXON`, `INTRON`, `HGVSc`, `HGVSp`, `cDNA_position`, `CDS_position`, `Protein_position`, `Amino_acids`, `Codons`, `Existing_variation`, `ALLELE_NUM`, `DISTANCE`, `STRAND`, `FLAGS`, `PICK`, `VARIANT_CLASS`, `SYMBOL_SOURCE`, `HGNC_ID`, `CANONICAL`, `MANE_SELECT`, `MANE_PLUS_CLINICAL`, `TSL`, `APPRIS`, `CCDS`, `ENSP`, `SWISSPROT`, `TREMBL`, `UNIPARC`, `RefSeq`, `DOMAINS`, `HGVS_OFFSET`, `gnomADe_AF`, `gnomADe_AFR_AF`, `gnomADe_AMR_AF`, `gnomADe_ASJ_AF`, `gnomADe_EAS_AF`, `gnomADe_FIN_AF`, `gnomADe_NFE_AF`, `gnomADe_OTH_AF`, `gnomADe_SAS_AF`, `CLIN_SIG`, `SOMATIC`, `PHENO`, `CHECK_REF`, `MOTIF_NAME`, `MOTIF_POS`, `HIGH_INF_POS`, `MOTIF_SCORE_CHANGE`, `TRANSCRIPTION_FACTORS`, `NearestExonJB`, `LoF`, `LoF_filter`, `LoF_flags`, `LoF_info` |
+| `CSQ` | Complete consequence annotations from VEP. Format (separated by a `|`): `Allele`, `Consequence`, `IMPACT`, `SYMBOL`, `Gene`, `Feature_type`, `Feature`, `BIOTYPE`, `EXON`, `INTRON`, `HGVSc`, `HGVSp`, `cDNA_position`, `CDS_position`, `Protein_position`, `Amino_acids`, `Codons`, `Existing_variation`, `ALLELE_NUM`, `DISTANCE`, `STRAND`, `FLAGS`, `PICK`, `VARIANT_CLASS`, `SYMBOL_SOURCE`, `HGNC_ID`, `CANONICAL`, `MANE_SELECT`, `MANE_PLUS_CLINICAL`, `TSL`, `APPRIS`, `CCDS`, `ENSP`, `SWISSPROT`, `TREMBL`, `UNIPARC`, `RefSeq`, `DOMAINS`, `HGVS_OFFSET`, `gnomADe_AF`, `gnomADe_AFR_AF`, `gnomADe_AMR_AF`, `gnomADe_ASJ_AF`, `gnomADe_EAS_AF`, `gnomADe_FIN_AF`, `gnomADe_NFE_AF`, `gnomADe_OTH_AF`, `gnomADe_SAS_AF`, `CLIN_SIG`, `SOMATIC`, `PHENO`, `CHECK_REF`, `MOTIF_NAME`, `MOTIF_POS`, `HIGH_INF_POS`, `MOTIF_SCORE_CHANGE`, `TRANSCRIPTION_FACTORS`, `NearestExonJB` |
| `Consequence` | Impact modifier for the consequence type (picked by VEP's `--flag_pick_allele` option) |
| `Gene` | Ensembl stable ID of affected gene (picked by VEP's `--flag_pick_allele` option) |
| `Feature_type` | Type of feature. Currently one of `Transcript`, `RegulatoryFeature`, `MotifFeature` (picked by VEP's `--flag_pick_allele` option) |
@@ -74,6 +74,7 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
| `CDS_position` | Relative position of base pair in coding sequence (picked by VEP's `--flag_pick_allele` option) |
| `CDS_RELATIVE_POSITION` | Ratio of variant coding position to length of coding sequence |
| `CDS_CHANGE` | Coding, transcript-specific sequence annotation (picked by VEP's `--flag_pick_allele` option) |
+| `ALTERATION` | HGVSp/HGVSc identifier |
| `AMINO_ACID_START` | Protein position indicating absolute start of amino acid altered (fetched from `Protein_position`) |
| `AMINO_ACID_END` | Protein position indicating absolute end of amino acid altered (fetched from `Protein_position`) |
| `Protein_position`| Relative position of amino acid in protein (picked by VEP's `--flag_pick_allele` option) |
@@ -90,6 +91,7 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
| `TREMBL`| Best match UniProtKB/TrEMBL accession of protein product (picked by VEP's `--flag_pick_allele` option) |
| `UNIPARC`| Best match UniParc accession of protein product (picked by VEP's `--flag_pick_allele` option) |
| `HGVSc`| The HGVS coding sequence name (picked by VEP's `--flag_pick_allele` option) |
+| `HGVSc_RefSeq`| The HGVSc coding sequence name using RefSeq transcript identifiers (MANE select) - picked by VEP's `--flag_pick_allele` option) |
| `HGVSp`| The HGVS protein sequence name (picked by VEP's `--flag_pick_allele` option) |
| `HGVSp_short`| The HGVS protein sequence name, short version (picked by VEP's `--flag_pick_allele` option) |
| `HGVS_OFFSET`| Indicates by how many bases the HGVS notations for this variant have been shifted (picked by VEP's `--flag_pick_allele` option) |
@@ -126,7 +128,7 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
| `LOSS_OF_FUNCTION` | Loss-of-function variant |
| `LOF_FILTER` | Loss-of-function filter |
| `SPLICE_DONOR_RELEVANT` | Logical indicating if variant is located at a particular location near the splice donor site (`+3A/G`, `+4A` or `+5G`) |
-| `BIOMARKER_MATCH` | Variant matches with germline biomarker evidence in CIViC/CGI. Format: \|\|::::\|. Multiple evidence items are separated by '&'. Example: civic|174|EID445:Colon/Rectum:Sensitivity/Response:D:Predictive:Germline&EID446:Colon/Rectum:Sensitivity/Response:D:Predictive:Germline|by_gene_mut. Matching type can be any of `by_genomic_coord`, `by_hgvsp_principal`, `by_hgvsc_principal`, `by_hgvsp_nonprincipal`, `by_hgvsc_nonprincipal`, `by_codon_principal`, `by_exon_mut_principal`, `by_gene_mut_lof`, `by_gene_mut` |
+| `BIOMARKER_MATCH` | Variant matches with germline biomarker evidence in CIViC/CGI. Format: `||::::|`. Multiple evidence items are separated by '&'. Example: civic|174|EID445:Colon/Rectum:Sensitivity/Response:D:Predictive:Germline&EID446:Colon/Rectum:Sensitivity/Response:D:Predictive:Germline|by_gene_mut. Matching type can be any of `by_genomic_coord`, `by_hgvsp_principal`, `by_hgvsc_principal`, `by_hgvsp_nonprincipal`, `by_hgvsc_nonprincipal`, `by_codon_principal`, `by_exon_mut_principal`, `by_gene_mut_lof`, `by_gene_mut` |
| `REGULATORY_ANNOTATION` | Comma-separated list of all variant annotations of `Feature_type`, `RegulatoryFeature`, and `MotifFeature`. Format (separated by a `|`): ``, ``, ``, ``, ``, ``, ``, ``, `` |
@@ -137,8 +139,8 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
|-----|-------------|
| `ENTREZGENE` | [Entrez](http://www.ncbi.nlm.nih.gov/gene) gene identifier |
| `APPRIS` | Principal isoform flags according to the [APPRIS principal isoform database](http://appris.bioinfo.cnio.es/#/downloads) |
-| `MANE_SELECT` | Indicating if the transcript is the MANE Select transcript for the gene (picked by VEP's `--flag_pick_allele_gene` option) |
-| `MANE_PLUS_CLINICAL` | Indicating if the transcript is the MANE Plus Clinical transcript for the gene (picked by VEP's `--flag_pick_allele_gene` option) |
+| `MANE_SELECT` | Indicating if the transcript is the MANE Select for the gene (picked by VEP's `--flag_pick_allele_gene` option) |
+| `MANE_PLUS_CLINICAL` | Indicating if the transcript is MANE Plus Clinical, as required for clinical variant reporting (picked by VEP's `--flag_pick_allele_gene` option) |
| `UNIPROT_ID` | [UniProt](http://www.uniprot.org) identifier |
| `UNIPROT_ACC` | [UniProt](http://www.uniprot.org) accession(s) |
| `ENSEMBL_GENE_ID` | Ensembl gene identifier for VEP's picked transcript (*ENSGXXXXXXX*) |
@@ -146,8 +148,8 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
| `ENSEMBL_PROTEIN_ID` | Ensembl corresponding protein identifier for VEP's picked transcript |
| `REFSEQ_TRANSCRIPT_ID` | Corresponding RefSeq transcript(s) identifier for VEP's picked transcript (*NM_XXXXX*) |
| `REFSEQ_PROTEIN_ID` | RefSeq protein/peptide identifier for VEP's picked transcript (*NP_XXXXXX*) |
-| `TRANSCRIPT_MANE_SELECT` | MANE select transcript identifer: one high-quality representative transcript per protein-coding gene that is well-supported by experimental data and represents the biology of the gene |
-| `TRANSCRIPT_MANE_PLUS_CLINICAL` | transcripts chosen to supplement MANE Select when needed for clinical variant reporting |
+| `MANE_SELECT2` | MANE select transcript identifer: one high-quality representative transcript per protein-coding gene that is well-supported by experimental data and represents the biology of the gene - provided through BioMart |
+| `MANE_PLUS_CLINICAL2` | transcripts chosen to supplement MANE Select when needed for clinical variant reporting - provided through BioMart |
| `GENCODE_TAG` | tag for GENCODE transcript (basic etc) |
| `GENCODE_TRANSCRIPT_TYPE` | type of transcript (protein-coding etc.) |
| `TSG` | Indicates whether gene is predicted as a tumor suppressor gene, from Network of Cancer Genes (NCG) & the CancerMine text-mining resource |
@@ -183,7 +185,12 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
| `MUTATION_HOTSPOT_MATCH` | Type of hotspot match (by_hgvsp_principal, by_hgvsc_principal, by_hgvsp_nonprincipal, by_hgvsc_nonprincipal, by_codon_principal, by_codon_nonprincipal) |
| `MUTATION_HOTSPOT_CANCERTYPE` | hotspot-associated cancer types (from cancerhotspots.org) |
| `PFAM_DOMAIN` | Pfam domain identifier (from VEP) |
-| `EFFECT_PREDICTIONS` | All predictions of effect of variant on protein function and pre-mRNA splicing from [database of non-synonymous functional predictions - dbNSFP v4.2](https://sites.google.com/site/jpopgen/dbNSFP). Predicted effects are provided by different sources/algorithms (separated by `&`), `T` = Tolerated, `N` = Neutral, `D` = Damaging: 1.[SIFT](https://sift.bii.a-star.edu.sg/), 2.[MutationTaster](http://www.mutationtaster.org/) (data release Nov 2015), 3.[MutationAssessor](http://mutationassessor.org/) (release 3), 4.[FATHMM](http://fathmm.biocompute.org.uk) (v2.3), 5.[PROVEAN](http://provean.jcvi.org/index.php) (v1.1 Jan 2015), 6.[FATHMM\_MKL](http://fathmm.biocompute.org.uk/fathmmMKL.htm), 7.[PRIMATEAI](https://www.nature.com/articles/s41588-018-0167-z), 8.[DEOGEN2](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5570203/), 9.[DBNSFP\_CONSENSUS\_RNN](https://www.biorxiv.org/content/10.1101/2021.04.09.438706v1) (Ensembl/consensus prediction, based on deep learning), 10.[SPLICE\_SITE\_EFFECT\_ADA](http://nar.oxfordjournals.org/content/42/22/13534) (Ensembl/consensus prediction of splice-altering SNVs, based on adaptive boosting), 11.[SPLICE\_SITE\_EFFECT\_RF](http://nar.oxfordjournals.org/content/42/22/13534) (Ensembl/consensus prediction of splice-altering SNVs, based on random forest), 12.[M-CAP](http://bejerano.stanford.edu/MCAP), 13.[MutPred](http://mutpred.mutdb.org), 14.[GERP](http://mendel.stanford.edu/SidowLab/downloads/gerp/), 15.[BayesDel](https://doi.org/10.1002/humu.23158), 16.[LIST-S2](https://doi.org/10.1093/nar/gkaa288), 17.[ALoFT](https://www.nature.com/articles/s41467-017-00443-5) |
+| `EFFECT_PREDICTIONS` | All predictions of effect of variant on protein function and pre-mRNA splicing from [database of non-synonymous functional predictions - dbNSFP v4.2](https://sites.google.com/site/jpopgen/dbNSFP). Predicted effects are provided by different sources/algorithms (separated by `&`), `T` = Tolerated, `N` = Neutral, `D` = Damaging: 1. [SIFT](https://sift.bii.a-star.edu.sg/), 2. [MutationTaster](http://www.mutationtaster.org/) (data release Nov 2015), 3. [MutationAssessor](http://mutationassessor.org/) (release 3), 4. [FATHMM](http://fathmm.biocompute.org.uk) (v2.3), 5. [PROVEAN](http://provean.jcvi.org/index.php) (v1.1 Jan 2015), 6. [FATHMM\_MKL](http://fathmm.biocompute.org.uk/fathmmMKL.htm), 7. [PRIMATEAI](https://www.nature.com/articles/s41588-018-0167-z), 8. [DEOGEN2](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5570203/), 9. [DBNSFP\_CONSENSUS\_RNN](https://www.biorxiv.org/content/10.1101/2021.04.09.438706v1) (Ensembl/consensus prediction, based on deep learning), 10. [SPLICE\_SITE\_EFFECT\_ADA](http://nar.oxfordjournals.org/content/42/22/13534) (Ensembl/consensus prediction of splice-altering SNVs, based on adaptive boosting), 11. [SPLICE\_SITE\_EFFECT\_RF](http://nar.oxfordjournals.org/content/42/22/13534) (Ensembl/consensus prediction of splice-altering SNVs, based on random forest), 12. [M-CAP](http://bejerano.stanford.edu/MCAP),
+13. [MutPred](http://mutpred.mutdb.org), 14. [GERP](http://mendel.stanford.edu/SidowLab/downloads/gerp/), 15. [BayesDel](https://doi.org/10.1002/humu.23158), 16. [LIST-S2](https://doi.org/10.1093/nar/gkaa288), 17. [ALoFT](https://www.nature.com/articles/s41467-017-00443-5),
+18. [AlphaMissense](https://console.cloud.google.com/storage/browser/dm_alphamissense),
+19. [ESM1b](https://huggingface.co/spaces/ntranoslab/esm_variants/tree/main),
+20. [PHACTboost](https://github.com/CompGenomeLab/PHACTboost),
+21. [MutFormer](https://github.com/WGLab/mutformer) |
| `DBNSFP_BAYESDEL_ADDAF` | predicted effect from BayesDel (dbNSFP) |
| `DBNSFP_LIST_S2` | predicted effect from LIST-S2 (dbNSFP) |
| `DBNSFP_SIFT` | predicted effect from SIFT (dbNSFP) |
@@ -196,6 +203,10 @@ A VCF file containing annotated, germline calls (single nucleotide variants and
| `DBNSFP_FATHMM` | predicted effect from FATHMM (dbNSFP) |
| `DBNSFP_PRIMATEAI` | predicted effect from PRIMATEAI (dbNSFP) |
| `DBNSFP_DEOGEN2` | predicted effect from DEOGEN2 (dbNSFP) |
+| `DBNSFP_PHACTBOOST` | predicted effect from PHACTboost (dbNSFP) |
+| `DBNSFP_ALPHA_MISSENSE` | predicted effect from AlphaMissense (dbNSFP) |
+| `DBNSFP_MUTFORMER` | predicted effect from MutFormer (dbNSFP) |
+| `DBNSFP_ESM1B` | predicted effect from ESM1b (dbNSFP) |
| `DBNSFP_GERP` | evolutionary constraint measure from GERP (dbNSFP) |
| `DBNSFP_FATHMM_MKL` | predicted effect from FATHMM-mkl (dbNSFP) |
| `DBNSFP_META_RNN` | predicted effect from ensemble prediction (deep learning - dbNSFP) |
@@ -336,48 +347,51 @@ The following variables are included in the tiered TSV file (VCF tags in the que
| 18. `ONCOGENE` | Gene is predicted as an oncogene according to Network of Cancer Genes (NCG)/Cancer Gene Census (CGC) and CancerMine |
| 19. `TUMOR_SUPPRESSOR` | Gene is predicted as a tumor suppressor gene according to Network of Cancer Genes (NCG)/Cancer Gene Census (CGC) and CancerMine |
| 20. `CONSEQUENCE` | Variant consequence |
-| 21. `PROTEIN_CHANGE` | Protein change - one letter abbreviation (HGVSp) |
-| 22. `PFAM_DOMAIN_NAME` | Protein domain name (Pfam) |
-| 23. `HGVSp` | The HGVS protein sequence name |
-| 24. `HGVSc` | The HGVS coding sequence name |
-| 25. `CDS_CHANGE` | Coding, transcript-specific sequence annotation |
-| 26. `LAST_EXON` | Last exon in gene |
-| 27. `EXON`| Exon of variant/total number of exons in transcript (from VEP) |
-| 28. `EXON_AFFECTED` | Transcript exon of variant (from VEP) |
-| 29. `EXON_POSITION` | Relative position of exon variant to nearest intron/exon junction (NearestExonJB plugin) |
-| 30. `INTRON_POSITION` | Relative position of intron variant to nearest intron/exon junction (NearestExonJB plugin) |
-| 31. `VEP_ALL_CSQ` | All VEP transcript block consequences |
-| 32. `CANCER_PHENOTYPE` | For variants with a ClinVar classification, indication of cancer-associated disease/phenotype (1) or not (0) |
-| 33. `MUTATION_HOTSPOT` | Cancer mutation hotspot (cancerhotspots.org) |
-| 34. `RMSK_HIT` | RepeatMasker hit |
-| 35. `EFFECT_PREDICTIONS` | Functional effect predictions from multiple algorithms (dbNSFP) |
-| 36. `LOSS_OF_FUNCTION` | Loss-of-function variant |
-| 37. `LOF_FILTER` | Loss-of-function filter |
-| 38. `NULL_VARIANT` | Frameshift or stop-gain variant |
-| 39. `DBMTS` | variant with potential effect on microRNA target sites (dbMTS). Format: `|||`. _Target prediction algorithms_ indicate support by different algorithms (separated by '&'), `TS` = TargetScan, `M` = miRanda, `R` = RNAhybrid. *Gain_loss_consensus* indicate whether the variant was predicted to disrupt a binding site (`L` = Loss), or create a new target site (`G` = gain) by the different algorithms |
-| 40. `REGULATORY_ANNOTATION` | Overlap of variant with regulatory elements (VEP) |
-| 41. `TF_BINDING_SITE_VARIANT` | Indicates whether a variant overlaps a critical/non-critical position of a transcription factor binding site (TFBS) - as provided by VEP's--regulatory option ('Overlap: non-critical motif position' or 'Overlap: critical motif position') |
-| 42. `TF_BINDING_SITE_VARIANT_INFO` | Comma-separated list of transcription factor binding sites affected by variant. Format per factor: `||||`. *HIGH_INF_POS* indicates whether the variant overlapped a critical motif position (`Y`), or non-critical motif position (`N`) |
-| 43. `GERP_SCORE` | Genomic conservation score (GERP) |
-| 44. `DBSNP_RSID` | dbSNP identifier (rsid) |
-| 45. `CLINVAR_CLASSIFICATION` | clinical significance of ClinVar-recorded variant |
-| 46. `CLINVAR_MSID` | measureset identifier of ClinVar variant |
-| 47. `CLINVAR_VARIANT_ORIGIN` | variant origin (somatic/germline) of ClinVar variant |
-| 48. `CLINVAR_CONFLICTED` | indicator of conflicting interpretations |
-| 49. `CLINVAR_PHENOTYPE` | associated phenotype(s) for ClinVar variant |
-| 50. `CLINVAR_REVIEW_STATUS_STARS` | Review confidence - number of gold stars |
-| 51. `N_INSILICO_CALLED` | Number of algorithms with effect prediction (damaging/tolerated) from dbNSFP |
-| 52. `N_INSILICO_DAMAGING` | Number of algorithms with damaging prediction from dbNSFP |
-| 53. `N_INSILICO_TOLERATED` | Number of algorithms with tolerated prediction from dbNSFP |
-| 54. `N_INSILICO_SPLICING_NEUTRAL` | Number of algorithms with splicing neutral prediction from dbscSNV |
-| 55. `N_INSILICO_SPLICING_AFFECTED` | Number of algorithms with splicing affected prediction from dbscSNV |
-| 56. `gnomADe_AF` | Global MAF in gnomAD (exome samples) |
-| 57. `FINAL_CLASSIFICATION` | Final variant classification, using either `CLINVAR_CLASSIFICATION` if variant is ClinVar-classified, or `CPSR_CLASSIFICATION` for novel variants |
-| 58. `CPSR_CLASSIFICATION` | Variant clinical significance by CPSR's classification algorithm (P/LP/VUS/LB/B) |
-| 59. `CPSR_PATHOGENICITY_SCORE` | Aggregated pathogenicity score by CPSR's algorithm |
-| 60. `CPSR_CLASSIFICATION_CODE` | Combination of CPSR classification codes assigned to the variant (ACMG) |
-| 61. `CPSR_CLASSIFICATION_DOC` | Descriptions of CPSR classification codes assigned to the variant (ACMG) |
-| 62. ` | Population specific MAF in gnomAD control (non-cancer, population configured by user) |
+| 21. `ALTERATION` | Molecular alteration (HGVSp or HGVSc pending on consequence) |
+| 22. `PROTEIN_CHANGE` | Protein change - one letter abbreviation (HGVSp) |
+| 23. `PFAM_DOMAIN` | Protein domain (Pfam identifier) |
+| 24. `PFAM_DOMAIN_NAME` | Protein domain name (Pfam) |
+| 25. `HGVSp` | The HGVS protein sequence name |
+| 26. `HGVSc` | The HGVS coding sequence name |
+| 27. `HGVSc_RefSeq` | The HGVS coding sequence name (RefSeq - MANE Select) |
+| 28. `CDS_CHANGE` | Coding, transcript-specific sequence annotation |
+| 29. `LAST_EXON` | Last exon in gene |
+| 30. `EXON`| Exon of variant/total number of exons in transcript (from VEP) |
+| 31. `EXON_AFFECTED` | Transcript exon of variant (from VEP) |
+| 32. `EXON_POSITION` | Relative position of exon variant to nearest intron/exon junction (NearestExonJB plugin) |
+| 33. `INTRON_POSITION` | Relative position of intron variant to nearest intron/exon junction (NearestExonJB plugin) |
+| 34. `VEP_ALL_CSQ` | All VEP transcript block consequences |
+| 35. `CANCER_PHENOTYPE` | For variants with a ClinVar classification, indication of cancer-associated disease/phenotype (1) or not (0) |
+| 36. `MUTATION_HOTSPOT` | Cancer mutation hotspot (cancerhotspots.org) |
+| 37. `RMSK_HIT` | RepeatMasker hit |
+| 38. `EFFECT_PREDICTIONS` | Functional effect predictions from multiple algorithms (dbNSFP) |
+| 39. `LOSS_OF_FUNCTION` | Loss-of-function variant |
+| 40. `LOF_FILTER` | Loss-of-function filter |
+| 41. `NULL_VARIANT` | Frameshift or stop-gain variant |
+| 42. `DBMTS` | variant with potential effect on microRNA target sites (dbMTS). Format: `|||`. _Target prediction algorithms_ indicate support by different algorithms (separated by '&'), `TS` = TargetScan, `M` = miRanda, `R` = RNAhybrid. *Gain_loss_consensus* indicate whether the variant was predicted to disrupt a binding site (`L` = Loss), or create a new target site (`G` = gain) by the different algorithms |
+| 43. `REGULATORY_ANNOTATION` | Overlap of variant with regulatory elements (VEP) |
+| 44. `TF_BINDING_SITE_VARIANT` | Indicates whether a variant overlaps a critical/non-critical position of a transcription factor binding site (TFBS) - as provided by VEP's--regulatory option ('Overlap: non-critical motif position' or 'Overlap: critical motif position') |
+| 45. `TF_BINDING_SITE_VARIANT_INFO` | Comma-separated list of transcription factor binding sites affected by variant. Format per factor: `||||`. *HIGH_INF_POS* indicates whether the variant overlapped a critical motif position (`Y`), or non-critical motif position (`N`) |
+| 46. `GERP_SCORE` | Genomic conservation score (GERP) |
+| 47. `DBSNP_RSID` | dbSNP identifier (rsid) |
+| 48. `CLINVAR_CLASSIFICATION` | clinical significance of ClinVar-recorded variant |
+| 49. `CLINVAR_MSID` | measureset identifier of ClinVar variant |
+| 50. `CLINVAR_VARIANT_ORIGIN` | variant origin (somatic/germline) of ClinVar variant |
+| 51. `CLINVAR_CONFLICTED` | indicator of conflicting interpretations |
+| 52. `CLINVAR_PHENOTYPE` | associated phenotype(s) for ClinVar variant |
+| 53. `CLINVAR_REVIEW_STATUS_STARS` | Review confidence - number of gold stars |
+| 54. `N_INSILICO_CALLED` | Number of algorithms with effect prediction (damaging/tolerated) from dbNSFP |
+| 55. `N_INSILICO_DAMAGING` | Number of algorithms with damaging prediction from dbNSFP |
+| 56. `N_INSILICO_TOLERATED` | Number of algorithms with tolerated prediction from dbNSFP |
+| 57. `N_INSILICO_SPLICING_NEUTRAL` | Number of algorithms with splicing neutral prediction from dbscSNV |
+| 58. `N_INSILICO_SPLICING_AFFECTED` | Number of algorithms with splicing affected prediction from dbscSNV |
+| 59. `gnomADe_AF` | Global MAF in gnomAD (exome samples) |
+| 60. `FINAL_CLASSIFICATION` | Final variant classification, using either `CLINVAR_CLASSIFICATION` if variant is ClinVar-classified, or `CPSR_CLASSIFICATION` for novel variants |
+| 61. `CPSR_CLASSIFICATION` | Variant clinical significance by CPSR's classification algorithm (P/LP/VUS/LB/B) |
+| 62. `CPSR_PATHOGENICITY_SCORE` | Aggregated pathogenicity score by CPSR's algorithm |
+| 63. `CPSR_CLASSIFICATION_CODE` | Combination of CPSR classification codes assigned to the variant (ACMG) |
+| 64. `CPSR_CLASSIFICATION_DOC` | Descriptions of CPSR classification codes assigned to the variant (ACMG) |
+| 65. ` | Population specific MAF in gnomAD control (non-cancer, population configured by user) |
**NOTE**: The user has the possibility to append the TSV file with data from other INFO tags in the input VCF (i.e. using the *--retained_info_tags* option)
diff --git a/vignettes/running.Rmd b/vignettes/running.Rmd
index c9b2e88..9bb5a3b 100644
--- a/vignettes/running.Rmd
+++ b/vignettes/running.Rmd
@@ -169,7 +169,7 @@ VEP options:
--vep_gencode_basic Consider basic GENCODE transcript set only with Variant Effect Predictor (VEP) (option '--gencode_basic' in VEP).
--vep_pick_order VEP_PICK_ORDER
Comma-separated string of ordered transcript properties for primary variant pick
- ( option '--pick_order' in VEP), default: mane_select,mane_plus_clinical,canonical,appris,tsl,biotype,ccds,rank,length
+ ( option '--pick_order' in VEP), default: mane_select,mane_plus_clinical,canonical,biotype,ccds,rank,tsl,appris,length
--vep_no_intergenic Skip intergenic variants during processing (option '--no_intergenic' in VEP), default: False
vcfanno options:
@@ -181,6 +181,7 @@ Other options:
You can force the overwrite of existing result files by using this flag, default: False
--version show program's version number and exit
--no_reporting Run functional variant annotation on VCF through VEP/vcfanno, omit classification/report generation (STEP 4), default: False
+ --no_html Do not generate HTML report (default: False)
--retained_info_tags RETAINED_INFO_TAGS
Comma-separated string of VCF INFO tags from query VCF that should be kept in CPSR output TSV
--ignore_noncoding Ignore non-coding (i.e. non protein-altering) variants in report, default: False