Boehringer-Ingelheim · grst · Mar 15, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,23 +1,25 @@
 Package: PersonalisIO
 Title: Read Personalis data into MultiAssayExperiment objects
-Version: 0.2.0.9000
+Version: 0.3.0.9000
 Authors@R: 
     person("Gregor", "Sturm", , "[email protected]", role = c("aut", "cre"))
 Description: This package provides convenience functions for reading real-world evidence data provided by Personalis into Bioconductor MultiAssayExperiment objects.
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
-Depends:
+Imports:
+    dplyr,
     SummarizedExperiment,
     readxl,
     MultiAssayExperiment,
     tibble,
     pbapply,
     tidyr,
     purrr,
-    dplyr,
     BumpyMatrix,
-    rvest
+    rvest,
+    stringr, 
+    vcfR
 Suggests: 
     knitr,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -7,15 +7,21 @@ export(read_personalis_hla_reports)
 export(read_personalis_msi_reports)
 export(read_personalis_small_variant_reports)
 export(read_personalis_tcr_reports)
+export(read_personalis_vcf_files)
+importFrom(BumpyMatrix,splitAsBumpyMatrix)
 importFrom(MultiAssayExperiment,MultiAssayExperiment)
 importFrom(SummarizedExperiment,SummarizedExperiment)
 importFrom(dplyr,across)
+importFrom(dplyr,all_of)
 importFrom(dplyr,any_of)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,contains)
 importFrom(dplyr,cur_column)
 importFrom(dplyr,distinct)
 importFrom(dplyr,if_else)
+importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,rename_with)
 importFrom(dplyr,select)
 importFrom(purrr,keep)
 importFrom(purrr,map)
@@ -25,6 +31,11 @@ importFrom(rvest,html_nodes)
 importFrom(rvest,html_table)
 importFrom(rvest,html_text)
 importFrom(rvest,read_html)
+importFrom(stringr,str_split_i)
+importFrom(stringr,str_to_title)
+importFrom(tibble,as_tibble)
 importFrom(tibble,tibble)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
+importFrom(vcfR,read.vcfR)
+importFrom(vcfR,vcfR2tidy)
diff --git a/R/personalis.R b/R/personalis.R
diff --git a/R/util.R b/R/util.R
@@ -98,6 +98,7 @@ read_samples <- function(sample_paths, io_func, description, ...) {
 #' @param col_data {data.frame} data frame that is used as colData (must have rownames that are sample identifiers!)
 #' @param sample_col {character} column in `df` that contains the sample identifier
 #' @return {tibble} new data frame with dummy entries added
+#' @importFrom tibble as_tibble
 #' @keywords internal
 add_dummy_entry <- function(df, col_data, sample_col = "sample") {
   missing_samples <- setdiff(rownames(col_data), unique(df[[sample_col]]))
@@ -114,3 +115,32 @@ add_dummy_entry <- function(df, col_data, sample_col = "sample") {
     dummy_entries
   )
 }
+
+#' Parse VCF files for a provided path and construct data frame.
+#'
+#' @param path path to VCF file in `*.vcf` or `*.vcf.gz` format
+#' @return {tibble} new data frame with all variants (fixed field and genotype information)
+#' @importFrom dplyr mutate left_join
+#' @importFrom vcfR read.vcfR vcfR2tidy
+#' @importFrom stringr str_split_i
+#' @importFrom tibble as_tibble
+parse_vcf_to_df <- function(path) {
+  # parse VCF file
+  vcf_content <- read.vcfR(path)
+
+  # fixed field content to data frame
+  fixed_df <- vcfR2tidy(vcf_content)$fix
+
+  # GT content to data frame
+  gt_df <- vcfR2tidy(vcf_content)$gt
+
+  # create addition column with observed nucleotides in order to avoid collisions when we do the left_join
+  gt_df <- gt_df |>
+    dplyr::mutate(ALT = str_split_i(gt_GT_alleles, "/", 2))
+
+  # next use ChromKey, POS and ALT for joining vcf content data frames
+  joined_vcf_df <- fixed_df |>
+    dplyr::left_join(gt_df, by = c("ChromKey", "POS", "ALT"))
+
+  as_tibble(joined_vcf_df)
+}
diff --git a/man/parse_vcf_to_df.Rd b/man/parse_vcf_to_df.Rd
diff --git a/man/read_personalis.Rd b/man/read_personalis.Rd
diff --git a/man/read_personalis_small_variant_report_sample.Rd b/man/read_personalis_small_variant_report_sample.Rd
diff --git a/man/read_personalis_small_variant_reports.Rd b/man/read_personalis_small_variant_reports.Rd
diff --git a/man/read_personalis_variant_calling_summary_statistics.Rd b/man/read_personalis_variant_calling_summary_statistics.Rd
diff --git a/man/read_personalis_vcf_files.Rd b/man/read_personalis_vcf_files.Rd
diff --git a/man/read_personalis_vcf_files_sample.Rd b/man/read_personalis_vcf_files_sample.Rd