From cca8253f8ea09b2577c717c657614bb1614a2fbe Mon Sep 17 00:00:00 2001 From: Sigve Nakken Date: Wed, 20 Dec 2023 21:11:08 +0100 Subject: [PATCH] custom panel and sf debug --- R/classification.R | 19 ++- R/main.R | 120 +++++++++++-------- inst/templates/quarto/cpsr_documentation.qmd | 17 ++- inst/templates/quarto/cpsr_virtual_panel.qmd | 7 +- 4 files changed, 97 insertions(+), 66 deletions(-) diff --git a/R/classification.R b/R/classification.R index 369ee75..24f2111 100644 --- a/R/classification.R +++ b/R/classification.R @@ -1045,7 +1045,6 @@ assign_variant_tiers <- snv_indel_report[["variant_set"]][[c]] ) - if (nrow(snv_indel_report[["variant_set"]][[c]]) == 0) { pcgrr::log4r_info(paste0("Zero variants found - ", c)) next @@ -1240,15 +1239,15 @@ assign_variant_tiers <- } } - snv_indel_report[["variant_set"]][["tsv"]] <- - dplyr::bind_rows( - snv_indel_report[["variant_set"]][["class5"]], - snv_indel_report[["variant_set"]][["class4"]], - snv_indel_report[["variant_set"]][["class3"]], - snv_indel_report[["variant_set"]][["class2"]], - snv_indel_report[["variant_set"]][["class1"]] - ) - + for(c in c("class5","class4","class3","class2","class1")){ + if(NROW(snv_indel_report[["variant_set"]][[c]]) > 0){ + snv_indel_report[["variant_set"]][["tsv"]] <- + snv_indel_report[["variant_set"]][["tsv"]] |> + dplyr::bind_rows( + snv_indel_report[["variant_set"]][[c]] + ) + } + } return(snv_indel_report) } diff --git a/R/main.R b/R/main.R index 22ae894..b52d28f 100644 --- a/R/main.R +++ b/R/main.R @@ -168,6 +168,12 @@ generate_cpsr_report <- function(yaml_fname = NULL) { return(cps_report) } + cpg_call_stats <- + pcgrr::variant_stats_report( + cpg_calls, + name = "v_stat_cpg" + ) + ## Assign calls to tiers (ClinVar calls + CPSR classification ## for novel, non-ClinVar variants) snv_indel_report <- @@ -238,44 +244,51 @@ generate_cpsr_report <- function(yaml_fname = NULL) { ## secondary findings if (cps_report$settings$conf$variant_classification$secondary_findings == TRUE) { - if(identical("undefined",unique(variant_calls$GENOTYPE)) == T){ + if(as.logical( + cps_report$settings$conf$sample_properties$genotypes_available) == F){ pcgrr::log4r_warn(paste0( "Assessement of secondary variant findings (ACMG SF v3.2) ", "NOT possible - variant genotype information unavailable" )) - } - secondary_calls <- - cpsr::retrieve_secondary_calls( - variant_calls, - umls_map = cps_report$ref_data$phenotype$umls) - - if(NROW(secondary_calls) > 0){ - secondary_calls <- secondary_calls |> - dplyr::anti_join( - cpg_calls, by = "VAR_ID" + }else{ + secondary_calls <- + cpsr::retrieve_secondary_calls( + variant_calls, + umls_map = cps_report$ref_data$phenotype$umls) + + ## do not report a secondary variant finding if this is + ## already reported among the main hits (e.g. BRCA) + if(NROW(secondary_calls) > 0){ + secondary_calls <- secondary_calls |> + dplyr::anti_join( + cpg_calls, by = "VAR_ID" + ) + } + secondary_call_stats <- + pcgrr::variant_stats_report( + secondary_calls, + name = "v_stat_secondary" ) + cps_report[['content']][['snv_indel']][['v_stat_secondary']] <- + secondary_call_stats$v_stat_secondary + + pcgrr::log4r_info(paste0( + "Assessement of secondary variant findings (ACMG SF v3.2)" + )) + if (NROW(secondary_calls) > 0) { + cps_report[["content"]][["snv_indel"]][["disp"]][["secondary"]] <- + secondary_calls |> + dplyr::arrange( + .data$LOSS_OF_FUNCTION, .data$CODING_STATUS) |> + dplyr::select( + dplyr::one_of(col_format_output[['html_sf']])) + } + pcgrr::log4r_info(paste0( + "Number of pathogenic ClinVar variants in the ACMG secondary findings list - other ", + "genes of clinical significance: ", + cps_report[["content"]][["snv_indel"]][["v_stat_secondary"]][["n_coding"]] + )) } - secondary_call_stats <- - pcgrr::variant_stats_report( - secondary_calls, - name = "v_stat_secondary" - ) - pcgrr::log4r_info(paste0( - "Assessement of secondary variant findings (ACMG SF v3.2)" - )) - if (NROW(secondary_calls) > 0) { - cps_report[["content"]][["snv_indel"]][["disp"]][["secondary"]] <- - secondary_calls |> - dplyr::arrange( - .data$LOSS_OF_FUNCTION, .data$CODING_STATUS) |> - dplyr::select( - dplyr::one_of(col_format_output[['html_sf']])) - } - pcgrr::log4r_info(paste0( - "Number of pathogenic variants in the ACMG secondary findings list - other ", - "genes of clinical significance: ", - cps_report[["content"]][["snv_indel"]][["v_stat_secondary"]][["n_coding"]] - )) } cps_report[["content"]][["snv_indel"]][["eval"]] <- TRUE @@ -399,7 +412,10 @@ write_cpsr_output <- function(report, ## Render report (quietly) pcgrr::log4r_info("------") pcgrr::log4r_info( - "Writing HTML file (.html) with report contents - quarto") + paste0( + "Generating quarto-based interactive HTML report (.html) with variant findings", + "- ('",output_format, "')")) + quarto::quarto_render( input = quarto_main_template_sample, execute_dir = tmp_quarto_dir, @@ -420,7 +436,6 @@ write_cpsr_output <- function(report, if(!(settings$conf$debug)){ system(glue::glue("rm -rf {tmp_quarto_dir}")) } - pcgrr::log4r_info("------") } } @@ -430,8 +445,8 @@ write_cpsr_output <- function(report, report[["content"]][["snv_indel"]][["variant_set"]][[output_format]]) > 0) { pcgrr::log4r_info("------") pcgrr::log4r_info( - paste0("Writing SNV/InDel tab-separated values (TSV) file ", - "with CPSR variant classifications - ('", + paste0("Generating SNV/InDel tab-separated values file (.tsv) ", + "with variant findings - ('", output_format, "')")) readr::write_tsv( @@ -440,16 +455,14 @@ write_cpsr_output <- function(report, col_names = T, quote = "none", na = ".") - - pcgrr::log4r_info("------") } } if (output_format == "xlsx") { pcgrr::log4r_info("------") pcgrr::log4r_info( - paste0("Writing Excel output file with ", - "CPSR report contents - ('", + paste0("Generating Excel workbook (.xlsx) with ", + "variant findings - ('", output_format, "')")) workbook <- openxlsx2::wb_workbook() |> openxlsx2::wb_add_worksheet(sheet = "VIRTUAL_PANEL") |> @@ -475,7 +488,18 @@ write_cpsr_output <- function(report, col_names = TRUE, na.strings = "", table_style = "TableStyleMedium16") |> - openxlsx2::wb_add_data_table( + openxlsx2::wb_set_col_widths( + sheet = "CLASSIFICATION", + cols = 1:length(cpsr::col_format_output[['xlsx_classification']]), + widths = "auto") |> + openxlsx2::wb_set_col_widths( + sheet = "VIRTUAL_PANEL", + cols = 1:ncol(report$settings$conf$gene_panel$panel_genes), + widths = "auto") + + if(NROW(report$content$snv_indel$clin_eitem$all$any) > 0){ + workbook <- workbook |> + openxlsx2::wb_add_data_table( sheet = "BIOMARKER_EVIDENCE", x = dplyr::select( report$content$snv_indel$clin_eitem$all$any, @@ -485,22 +509,16 @@ write_cpsr_output <- function(report, col_names = TRUE, na.strings = "", table_style = "TableStyleMedium17") |> - openxlsx2::wb_set_col_widths( - sheet = "CLASSIFICATION", - cols = 1:length(cpsr::col_format_output[['xlsx_classification']]), - widths = "auto") |> openxlsx2::wb_set_col_widths( sheet = "BIOMARKER_EVIDENCE", cols = 1:length(cpsr::col_format_output[['xlsx_biomarker']]), - widths = "auto") |> - openxlsx2::wb_set_col_widths( - sheet = "VIRTUAL_PANEL", - cols = 1:ncol(report$settings$conf$gene_panel$panel_genes), - widths = "auto") |> + widths = "auto") + } + + workbook <- workbook |> openxlsx2::wb_save( fnames[['xlsx']], overwrite = TRUE) - pcgrr::log4r_info("------") } diff --git a/inst/templates/quarto/cpsr_documentation.qmd b/inst/templates/quarto/cpsr_documentation.qmd index 9242837..24afde1 100644 --- a/inst/templates/quarto/cpsr_documentation.qmd +++ b/inst/templates/quarto/cpsr_documentation.qmd @@ -13,7 +13,7 @@ This report is intended for interpretation of inherited DNA variants implicated ### Gene/variant annotation resources -The variant interpretation procedures performed in this cancer genome report are relying upon multiple tools and knowledge resources, which are outlined below. Note that some resources demand specific licensing agreements if you plan to utilize their data (and thus this report) in a non-academic setting. +The variant interpretation procedures performed in this cancer genome report are relying upon multiple tools and knowledge resources, as outlined below. Note that some resources (highlighted below) demand specific licensing agreements if you plan to utilize their data (and thus this report) in a commercial, non-academic setting. * __PCGR databundle version__ @@ -32,8 +32,14 @@ for(i in 1:NROW(ref_datasets)){ description <- ref_datasets[i,"source_description"] url <- ref_datasets[i,"source_url"] version <- ref_datasets[i,"source_version"] + if(version == "."){ + version = NA + } license <- ref_datasets[i, "source_license"] license_url <- ref_datasets[i, "source_license_url"] + if(license_url == "."){ + license_url <- NA + } wflow <- ref_datasets[i, "wflow"] if(!(stringr::str_detect( wflow,"cpsr"))){ @@ -51,8 +57,13 @@ for(i in 1:NROW(ref_datasets)){ } }else{ if(!is.na(license_url)){ - s <- paste0(" * [", source_full, "](", url, ") - ", description, " (", version, ")", - " - [", license,"](", license_url, ")") + if(source == "cgc" | source == "gepa"){ + s <- paste0(" * [", source_full, "](", url, ") - ", description, " (", version, ")", + " - [", license,"](", license_url, ")") + }else{ + s <- paste0(" * [", source_full, "](", url, ") - ", description, " (", version, ")", + " - [", license,"](", license_url, ")") + } }else{ s <- paste0(" * [", source_full, "](", url, ") - ", description, " (", version, ")", " - ", license) diff --git a/inst/templates/quarto/cpsr_virtual_panel.qmd b/inst/templates/quarto/cpsr_virtual_panel.qmd index 1197ff0..9606af2 100644 --- a/inst/templates/quarto/cpsr_virtual_panel.qmd +++ b/inst/templates/quarto/cpsr_virtual_panel.qmd @@ -2,10 +2,13 @@ ### Virtual gene panel -Cancer predisposition geneset subject to analysis/screening in this report: +Variants reported for the sample are limited to the tile of cancer predisposition genes +shown below. IMPORTANT NOTE: CPSR does not perform any interrogation of _which genes +that were subject to sequencing_, it merely checks potential overlap of input variants +within the user-defined __virtual__ panel. * __`r cps_report[['settings']][['conf']][['gene_panel']][['description']]`__ - * Diagnostic-grade genes (applicable to Genomics England panels only): __`r as.logical(cps_report[['settings']][['conf']][['gene_panel']][['diagnostic_grade_only']])`__ + * Diagnostic-grade genes only (applicable to Genomics England panels): __`r as.logical(cps_report[['settings']][['conf']][['gene_panel']][['diagnostic_grade_only']])`__ ```{r gene_selection, echo = F, eval = T} tiles_html <- cpsr::plot_virtual_panels(