Skip to content

Commit

Permalink
doc cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Mar 11, 2024
1 parent 6228d0c commit 5b0c960
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 133 deletions.
50 changes: 0 additions & 50 deletions R/classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -1038,56 +1038,6 @@ combine_novel_and_preclassified <-
"^, ", ""
)

# snv_indel_report[["variant"]][[c]] <-
# snv_indel_report[["variant"]][[c]] |>
# dplyr::select(
# c("SAMPLE_ID",
# "GENOMIC_CHANGE",
# "VAR_ID",
# "GENOTYPE",
# "CPSR_CLASSIFICATION_SOURCE",
# "GENOME_VERSION",
# "VARIANT_CLASS",
# "CODING_STATUS",
# "SYMBOL",
# "GENENAME",
# "CCDS",
# "ENTREZGENE",
# "UNIPROT_ID",
# "ENSEMBL_GENE_ID",
# "ENSEMBL_TRANSCRIPT_ID",
# "REFSEQ_TRANSCRIPT_ID",
# "ONCOGENE",
# "TUMOR_SUPPRESSOR",
# "CANCERGENE_EVIDENCE",
# "CONSEQUENCE",
# "VEP_ALL_CSQ",
# "CDS_CHANGE",
# "PROTEIN_CHANGE",
# "HGVSp",
# "HGVSc",
# "EFFECT_PREDICTIONS",
# "LOSS_OF_FUNCTION",
# "NULL_VARIANT",
# "LAST_EXON",
# "EXON_POSITION",
# "INTRON_POSITION",
# "MUTATION_HOTSPOT",
# "REGULATORY_ANNOTATION",
# "PFAM_DOMAIN_NAME",
# "DBSNP",
# "RMSK_HIT",
# "CANCER_PHENOTYPE",
# "CLINVAR_CLASSIFICATION",
# "CLINVAR_MSID",
# "CLINVAR_VARIANT_ORIGIN",
# "CLINVAR_CONFLICTED",
# "CLINVAR_PHENOTYPE",
# "CLINVAR_REVIEW_STATUS_STARS"
# ),
# dplyr::everything()
# )

for (col in colnames(var_calls)) {
if (nrow(var_calls[!is.na(
var_calls[, col]
Expand Down
9 changes: 6 additions & 3 deletions R/input_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,16 @@ load_germline_snv_indel <- function(
)

if("FINAL_CLASSIFICATION" %in%
colnames(callset$biomarker_evidence$items)){
colnames(callset$biomarker_evidence$items) &
"CPSR_CLASSIFICATION_SOURCE" %in%
colnames(callset$variant)){

callset$biomarker_evidence$items <-
callset$biomarker_evidence$items |>
dplyr::filter(
.data$FINAL_CLASSIFICATION == "Pathogenic" |
.data$FINAL_CLASSIFICATION == "Likely_Pathogenic")
.data$CPSR_CLASSIFICATION_SOURCE == "ClinVar" &
(.data$FINAL_CLASSIFICATION == "Pathogenic" |
.data$FINAL_CLASSIFICATION == "Likely_Pathogenic"))
}

}
Expand Down
132 changes: 71 additions & 61 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -276,70 +276,80 @@ write_cpsr_output <- function(report,
settings[["conf"]][["visual_reporting"]][["visual_theme"]]

if (output_format == "html") {
if(file.exists(quarto_input)){
if(report$content$snv_indel$v_stat_cpg$n < 2000){
if(file.exists(quarto_input)){

## make temporary directory for quarto report rendering
tmp_quarto_dir <- file.path(
output_dir,
paste0('quarto_', stringi::stri_rand_strings(1, 15))
)
quarto_main_template <-
glue::glue("{tmp_quarto_dir}{.Platform$file.sep}cpsr_report.qmd")
quarto_main_template_sample <-
glue::glue("{tmp_quarto_dir}{.Platform$file.sep}cpsr_report_sample.qmd")
quarto_html <-
glue::glue("{tmp_quarto_dir}{.Platform$file.sep}cpsr_report_sample.html")

## Copy all CPSR quarto reporting templates, bibliography, css etc to
## the temporary directory for quarto report rendering
invisible(cpsr::mkdir(tmp_quarto_dir))
system(glue::glue("cp -r {cpsr_rep_template_path}{.Platform$file.sep}* {tmp_quarto_dir}"))

## Save sample CPSR report object in temporary quarto rendering directory
rds_report_path <- file.path(
tmp_quarto_dir, "cps_report.rds")
report$ref_data <- NULL
saveRDS(report, file = rds_report_path)

## Substitute rds object in main quarto template with path to sample rds
readLines(quarto_main_template) |>
stringr::str_replace(
pattern = "<CPSR_REPORT_OBJECT.rds>",
replacement = rds_report_path) |>
stringr::str_replace(
pattern = "<SAMPLE_NAME>",
replacement = sample_name
) |>
writeLines(con = quarto_main_template_sample)

## Render report (quietly)
pcgrr::log4r_info("------")
pcgrr::log4r_info(
paste0(
"Generating quarto-based interactive HTML report (.html) with variant findings",
"- ('",output_format, "')"))

quarto::quarto_render(
input = quarto_main_template_sample,
execute_dir = tmp_quarto_dir,
quiet = !report$settings$conf$debug)

## Copy output HTML report from temporary rendering directory
## to designated HTML file in output directory
if(file.exists(quarto_html)){
system(
glue::glue(paste0(
"cp -f {quarto_html} ",
"{fnames[['html']]}")))
}else{
cat("WARNING\n")
}
## make temporary directory for quarto report rendering
tmp_quarto_dir <- file.path(
output_dir,
paste0('quarto_', stringi::stri_rand_strings(1, 15))
)
quarto_main_template <-
glue::glue("{tmp_quarto_dir}{.Platform$file.sep}cpsr_report.qmd")
quarto_main_template_sample <-
glue::glue("{tmp_quarto_dir}{.Platform$file.sep}cpsr_report_sample.qmd")
quarto_html <-
glue::glue("{tmp_quarto_dir}{.Platform$file.sep}cpsr_report_sample.html")

## Copy all CPSR quarto reporting templates, bibliography, css etc to
## the temporary directory for quarto report rendering
invisible(cpsr::mkdir(tmp_quarto_dir))
system(glue::glue("cp -r {cpsr_rep_template_path}{.Platform$file.sep}* {tmp_quarto_dir}"))

## Save sample CPSR report object in temporary quarto rendering directory
rds_report_path <- file.path(
tmp_quarto_dir, "cps_report.rds")
report$ref_data <- NULL
saveRDS(report, file = rds_report_path)

## Substitute rds object in main quarto template with path to sample rds
readLines(quarto_main_template) |>
stringr::str_replace(
pattern = "<CPSR_REPORT_OBJECT.rds>",
replacement = rds_report_path) |>
stringr::str_replace(
pattern = "<SAMPLE_NAME>",
replacement = sample_name
) |>
writeLines(con = quarto_main_template_sample)

## Render report (quietly)
pcgrr::log4r_info("------")
pcgrr::log4r_info(
paste0(
"Generating quarto-based interactive HTML report (.html) with variant findings",
"- ('",output_format, "')"))

quarto::quarto_render(
input = quarto_main_template_sample,
execute_dir = tmp_quarto_dir,
quiet = !report$settings$conf$debug)

## Copy output HTML report from temporary rendering directory
## to designated HTML file in output directory
if(file.exists(quarto_html)){
system(
glue::glue(paste0(
"cp -f {quarto_html} ",
"{fnames[['html']]}")))
}else{
cat("WARNING\n")
}

## remove temporary quarto directory (if debugging is switched off)
if(!(settings$conf$debug)){
system(glue::glue("rm -rf {tmp_quarto_dir}"))
## remove temporary quarto directory (if debugging is switched off)
if(!(settings$conf$debug)){
system(glue::glue("rm -rf {tmp_quarto_dir}"))
}
pcgrr::log4r_info("------")
}
pcgrr::log4r_info("------")
}else{
pcgrr::log4r_warn("------")
pcgrr::log4r_warn(
paste0("Too large variant set (n = ",
report$content$snv_indel$v_stat_cpg$n,
"for display in HTML report - ",
"skipping report generation"))
pcgrr::log4r_warn("------")
}
}

Expand Down
43 changes: 43 additions & 0 deletions data-raw/data-raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -469,3 +469,46 @@ usethis::use_data(col_format_output, overwrite = T)
# wb = workbook,
# "pkgdown/assets/cpsr_superpanel_2024_03.xlsx",
# overwrite = TRUE)

#
# panel_zero_display <- panel_zero$grch38 |>
# dplyr::select(
# c("ENTREZGENE",
# "SYMBOL",
# "ENTREZGENE",
# "ENSEMBL_GENE_ID",
# "GENENAME",
# "CPG_PHENOTYPES",
# "CPG_MOI",
# "CPG_MOD",
# "CPG_SOURCE",
# )
# ) |>
# dplyr::mutate(
# CPG_SOURCE = stringr::str_replace_all(
# CPG_SOURCE, "&", ", "
# )) |>
# dplyr::mutate(
# CPG_SOURCE = stringr::str_replace_all(
# CPG_SOURCE, "ACMG_SF", ""
# )
# ) |>
# dplyr::mutate(
# GENE = paste0(
# "<a href='https://www.ncbi.nlm.nih.gov/gene/",
# .data$ENTREZGENE,
# "' target='_blank'>",
# .data$SYMBOL, "</a>"
# )
# ) |>
# dplyr::select(
# c("GENE","ENTREZGENE","ENSEMBL_GENE_ID",
# "CPG_MOD", "CPG_MOI", "GENENAME",
# "CPG_SOURCE", "CPG_PHENOTYPES")
# )
#
# readr::write_tsv(
# panel_zero_display, file = "inst/extdata/panel_zero.tsv.gz",
# na = "NA", col_names = T,quote = "none"
# )

Binary file added inst/extdata/panel_zero.tsv.gz
Binary file not shown.
20 changes: 17 additions & 3 deletions pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,29 @@ toc:
depth: 3
template:
bootstrap: 5
bslib:
info: "#007a74"
dropdown-link-hover-bg: "#007a74"
dropdown-link-hover-color: "white"
dropdown-link-active-color: "white"
navbar-light-color: "white"
navbar-light-brand-color: "white"
navbar-light-brand-hover-color: "white"
navbar-link-color: "white"
authors:
Sigve Nakken:
href: https://github.com/sigven
Peter Diakumis:
href: https://github.com/pdiakumis
navbar:
link-color: "white"
light-color: "white"
light-brand-color: "white"
type: light
bg: info
structure:
left: [home, installation, running, articles, changelog]
right: [reference, search, github]
left: [installation, running, reference, articles]
right: [search, changelog, github]
components:
home:
text: Intro
Expand All @@ -30,7 +44,7 @@ navbar:
href: articles/input.html
- text: Virtual gene panels
href: articles/virtual_panels.html
- text: ACMG classification
- text: Variant classification
href: articles/variant_classification.html
- text: Output files
href: articles/output.html
Expand Down
20 changes: 9 additions & 11 deletions pkgdown/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,21 @@

<br>

The *Cancer Predisposition Sequencing Reporter (CPSR)* is a computational workflow that **interprets germline variants** identified from next-generation sequencing **in the context of cancer predisposition**. The workflow is integrated with the framework that underlies [Personal Cancer Genome Reporter - PCGR ](https://github.com/sigven/pcgr). While *PCGR* is intended for reporting and analysis of somatic variants detected in a tumor, *CPSR* is intended for reporting and ranking of germline variants in protein-coding genes that are implicated in cancer predisposition and inherited cancer syndromes.
The *Cancer Predisposition Sequencing Reporter (CPSR)* is a computational workflow that **interprets germline variants** identified from next-generation sequencing **in the context of cancer predisposition**.

*CPSR* accepts a query file with raw germline variant calls(SNVs/InDels) from a single sample, encoded in the [VCF format ](https://samtools.github.io/hts-specs/VCFv4.2.pdf). Through the use several different [virtual cancer predisposition gene panels](articles/virtual_panels.html), the user can flexibly put a restriction on which genes and findings are displayed in the cancer predisposition report. *CPSR* determines the clinical significance of variants through the implementation of refined ACMG-AMP variant classification criteria.
*CPSR* accepts a query file with raw germline variant calls (SNVs/InDels) from a single sample (cancer patient), encoded in the [VCF format ](https://samtools.github.io/hts-specs/VCFv4.2.pdf). CPSR conducts comprehensive gene and variant annotation on the input calls, and generates a dedicated _variant HTML report_, that provides the following main functionality:

Snapshots of sections in the cancer predisposition genome report:
1) Flexible **selection of cancer predisposition genes** subject to analysis
2) **Variant classification** (*Pathogenic* to _Benign_) according to published guidelines (ACMG/AMP)
3) **Biomarker matching** of sample variants (prognosis, diagnosis, drug sensitivity/resistance)
4) Potential **secondary/incidental findings** (ACMG recommendations)

![](img/cpsr_views.png)

The software performs extensive variant annotation on the selected geneset and produces an interactive HTML report, in which the user can investigate:
The workflow is integrated with the framework that underlies [Personal Cancer Genome Reporter - PCGR ](https://github.com/sigven/pcgr). While *PCGR* is intended for reporting and analysis of somatic variants detected in a tumor, *CPSR* is intended for reporting and ranking of germline variants in protein-coding genes that are implicated in cancer predisposition and inherited cancer syndromes.

* __ClinVar variants__ - pre-classified variants according to a five-level tier scheme in ClinVar (Pathogenic to Benign)
* __Novel variants__ - classified by CPSR through refined ACMG criteria (variant frequency levels and functional effects) into a five-level classification scheme (Pathogenic to Benign)
* __Variant biomarkers__ - cancer predisposition variants with reported implications for prognosis, diagnosis or therapeutic regimens
* __Secondary findings (optional)__ - pathogenic variants in the [ACMG recommended list for reporting of secondary findings](https://www.ncbi.nlm.nih.gov/clinvar/docs/acmg/)
* __GWAS hits (optional)__ - variants overlapping with previously identified hits in genome-wide association studies (GWAS) of cancer phenotypes (i.e. low to moderate risk conferring alleles).
Snapshots of sections in the cancer predisposition genome report:

The variant sets can be interactively explored and filtered further through different types of filters (phenotypes, genes, variant consequences, population MAF etc.). Importantly, the unclassified (i.e. non-ClinVar) variants are assigned a *pathogenicity score* based on the aggregation of scores according to previously established [ACMG criteria](https://www.ncbi.nlm.nih.gov/pubmed/25741868). The ACMG criteria includes cancer-specific criteria, as outlined and specified in several previous studies ([Huang et al., *Cell*, 2018](https://www.ncbi.nlm.nih.gov/pubmed/29625052); [Nykamp et al., *Genet Med.*, 2017](https://www.ncbi.nlm.nih.gov/pubmed/28492532); [Maxwell et al., *Am J Hum Genet.*, 2016](https://www.ncbi.nlm.nih.gov/pubmed/27153395); [Amendola et al., *Am J Hum Genet.*, 2016](https://www.ncbi.nlm.nih.gov/pubmed/27181684)). See also [*Related work*](https://github.com/sigven/cpsr#related-work) below).
![](img/cpsr_views.png)


### News
Expand Down
13 changes: 8 additions & 5 deletions vignettes/virtual_panels.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ The cancer predisposition report can show variants found in a number of well-kno

* **Panel 0** is a non-conservative, research-based _superpanel_ assembled through multiple sources on cancer predisposition genes:
* A list of 152 genes that were curated and established within TCGA’s pan-cancer study ([Huang et al., *Cell*, 2018](https://www.ncbi.nlm.nih.gov/pubmed/29625052))
* A list of 114 protein-coding genes that has been manually curated in COSMIC’s [Cancer Gene Census v91](https://cancer.sanger.ac.uk/census),
* Genes from all [Genomics England PanelApp](https://panelapp.genomicsengland.co.uk/) panels for inherited cancers and tumor syndromes (detailed below)
* Additional genes deemed relevant for cancer predisposition (contributed by the CPSR user community)
* A list of 114 protein-coding genes that has been manually curated in COSMIC’s [Cancer Gene Census v99](https://cancer.sanger.ac.uk/census),
* Genes from all [Genomics England PanelApp](https://panelapp.genomicsengland.co.uk/) panels for inherited cancers and tumor syndromes, as well as DNA repair genes (detailed below)
* Additional genes deemed relevant for cancer predisposition (i.e. contributed by CPSR users)


The combination of the above sources resulted in a [non-redundant set of **n = 563**
genes](https://cpsr.readthedocs.io/en/latest/superpanel.html) of relevance for cancer predisposition (see complete details [below](#panel-0))
The combination of the above sources resulted in a non-redundant set of **n = 563**
genes of relevance for cancer predisposition (see complete details [below](#panel-0))

Data with respect to mechanisms of inheritance (<i>MoI</i> - autosomal recessive (AR) vs. autosomal
dominant (AD) etc.) and whether mechanisms of disease are associated with loss-of-function (<i>LoF</i>) or
Expand Down Expand Up @@ -76,6 +76,7 @@ The cancer predisposition report can show variants found in a number of well-kno
[Download the complete set of CPSR superpanel genes, grch37/grch38 versions (xlsx)](https://sigven.github.io/cpsr/cpsr_superpanel_2024_03.xlsx)


<!--
| no | gene_link | entrezgene | ensembl_gene_id | moi | mod | gene_name | source | phenotype_syndrome_term |
|----:|:-----------------------------------------------------------------------------------|-----------:|:----------------|:----------|:----|:--------------------------------------------------------------------------------------------------|:-------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------|
| 1 | <a href='https://www.ncbi.nlm.nih.gov/gene/8647' target='_blank'>ABCB11</a> | 8647 | ENSG00000073734 | AR | LoF | ATP binding cassette subfamily B member 11 | TCGA_PANCAN_2018 | Crigler-Najjar syndrome, type II |
Expand Down Expand Up @@ -511,3 +512,5 @@ The cancer predisposition report can show variants found in a number of well-kno
| 431 | <a href='https://www.ncbi.nlm.nih.gov/gene/7516' target='_blank'>XRCC2</a> | 7516 | ENSG00000196584 | AR | LoF | X-ray repair cross complementing 2 | NCGC,PANEL_APP | NA |
| 432 | <a href='https://www.ncbi.nlm.nih.gov/gene/7517' target='_blank'>XRCC3</a> | 7517 | ENSG00000126215 | AD | NA | X-ray repair cross complementing 3 | OTHER | NA |
| 433 | <a href='https://www.ncbi.nlm.nih.gov/gene/7704' target='_blank'>ZBTB16</a> | 7704 | ENSG00000109906 | NA | NA | zinc finger and BTB domain containing 16 | OTHER | NA |
-->

0 comments on commit 5b0c960

Please sign in to comment.