Skip to content

Commit

Permalink
plotting + vignette updates (#28)
Browse files Browse the repository at this point in the history
* plotting + vignette updates

* Increment version number

* choosing ncells for clustering
  • Loading branch information
marcjwilliams1 authored Aug 31, 2021
1 parent 3455062 commit aa97c6e
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 103 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: schnapps
Title: Single Cell Haplotype copy Number Analysis through Phased Probabilistic States
Version: 0.5.1
Version: 0.5.2
Author@R: c(person("Marc", "Williams", email = "[email protected]",
role = c("aut", "cre")),
person("Tyler", "Funnell",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# schnapps 0.5.2

* update to vignette and plotting

# schnapps 0.5.1

* updated default parameters
Expand Down
55 changes: 33 additions & 22 deletions R/callHSCN.R
Original file line number Diff line number Diff line change
Expand Up @@ -236,36 +236,48 @@ callalleleHMMcell <- function(CNBAF,
return(as.data.frame(alleleCN))
}

min_cells <- function(haplotypes, minfrachaplotypes = 0.95, mincells = 5) {
min_cells <- function(haplotypes, minfrachaplotypes = 0.95, mincells = 5, samplen = 10) {
chr <- hap_label <- NULL
nhaps_vec <- c()
prop_vec <- c()
prop <- c(0.005, 0.01, 0.02, 0.03, 0.04, seq(0.05, 1.0, 0.05))
prop <- c(0.005, 0.01, 0.02, 0.03, 0.04, 0.05, seq(0.1, 1.0, 0.1))
mycells <- unique(haplotypes$cell_id)
prop <- c(5 / length(mycells), 6 / length(mycells), 7 / length(mycells),
prop <- c(1 / length(mycells), 5 / length(mycells), 6 / length(mycells), 7 / length(mycells),
8 / length(mycells), 9 / length(mycells), 10 / length(mycells),
20 / length(mycells), 50 / length(mycells), prop)
prop <- sort(prop)
prop <- unique(sort(prop))
prop <- prop[prop < 1.0]
ncells <- length(mycells)
haplotype_counts <- as.data.table(haplotypes) %>%
.[, list(n = .N, nfrac = .N / ncells),
by = c("chr", "start", "end", "hap_label")
]
nhaps <- dim(dplyr::distinct(haplotype_counts, chr, start, hap_label))[1]

haplotype <- as.data.table(haplotypes)

for (i in prop) {
samp_cells <- sample(mycells, round(i * length(mycells)))
haplotype_counts_temp <-
as.data.table(dplyr::filter(haplotypes, cell_id %in% samp_cells)) %>%
.[, list(n = .N, nfrac = .N / ncells),
by = c("chr", "start", "end", "hap_label")
]
nhaps_temp <- dim(dplyr::distinct(haplotype_counts_temp, chr, start, hap_label))[1]
nhaps_vec <- c(nhaps_vec, nhaps_temp)
prop_vec <- c(prop_vec, i)
for (j in 1:samplen){
if (round(i * length(mycells)) == 0){
next
}
#sample cells
samp_cells <- sample(mycells, round(i * length(mycells)))
#filter haplotypes down to sampled cells
haplotypes_temp <- haplotypes[cell_id %in% samp_cells]
#count the number of haplotype blocks that have been retained
nhaps_temp <- dim(dplyr::distinct(haplotypes_temp, chr, start, hap_label))[1]
nhaps_vec <- c(nhaps_vec, nhaps_temp)
prop_vec <- c(prop_vec, i)
}
}

df <- data.frame(prop = prop_vec, nhaps = nhaps_vec / nhaps) %>%
dplyr::mutate(ncells = round(prop * length(mycells)))
dplyr::mutate(ncells = round(prop * length(mycells))) %>%
dplyr::group_by(prop, ncells) %>%
dplyr::summarise(nhaps = mean(nhaps), min_nhaps = min(nhaps)) %>%
dplyr::ungroup(.) %>%
as.data.frame(.)

ncells_forclustering <- df %>%
dplyr::filter(nhaps >= minfrachaplotypes) %>%
Expand Down Expand Up @@ -364,8 +376,7 @@ get_cells_per_chr_global <- function(ascn,
get_cells_per_chr_local <- function(ascn,
haplotypes,
ncells_for_clustering,
field = "BAF",
clustering_method = "copy",
field = "state_BAF",
phasebyarm = FALSE) {

# cluster cells per chromosome
Expand All @@ -378,23 +389,25 @@ get_cells_per_chr_local <- function(ascn,
n_neighbors = 20,
min_dist = 0.001,
minPts = ncells_for_clustering,
field = "state_BAF",
field = field,
umapmetric = "euclidean"
)
prop <- ascn_chr[as.data.table(cl$clustering), on = "cell_id"] %>%
.[, list(
propA = round(sum(balance) / .N, 2),
n = sum(balance),
propModestate = sum(state == Mode(state)) / .N,
propLOH = sum(LOH == "LOH") / .N,
ncells = length(unique(cell_id))
), by = .(chr, cell_id, clone_id)] %>%
.[, list(
propA = median(propA),
n = median(n),
propModestate = median(propModestate),
propLOH = median(propLOH),
ncells = median(ncells)
), by = .(chr, clone_id)]
prop <- prop[order(propA, propModestate, n, decreasing = TRUE)]
prop <- prop[order(propA, propModestate, propLOH, ncells, n, decreasing = TRUE)]
prop <- prop[prop[, .I[which.max(propA)], by = chr]$V1]
cells <- dplyr::filter(cl$clustering, clone_id == prop$clone_id[1]) %>%
dplyr::pull(cell_id)
Expand All @@ -409,8 +422,8 @@ proportion_imbalance <- function(ascn,
haplotypes,
field = "copy",
phasebyarm = FALSE,
minfrachaplotypes = 0.95,
clustering_method = "copy",
minfrachaplotypes = 0.95,
overwritemincells = NULL,
cluster_per_chr = TRUE) {
ncells <- length(unique(ascn$cell_id))
Expand All @@ -430,8 +443,7 @@ proportion_imbalance <- function(ascn,
chrlist <- get_cells_per_chr_local(ascn,
haplotypes,
ncells_for_clustering,
field = field,
clustering_method = clustering_method
field = field
)
} else {
chrlist <- get_cells_per_chr_global(ascn,
Expand All @@ -442,7 +454,6 @@ proportion_imbalance <- function(ascn,
)
}


return(list(chrlist = chrlist, propdf = propdf))
}

Expand Down
2 changes: 1 addition & 1 deletion R/combinedata_rna.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ format_haplotypes_rna <- function(haplotypes,
dplyr::select(cell_id, chr, start, end, hap_label, alleleA, alleleB, totalcounts, BAF, sample, patient)

if (create_cell_id) {
haplotypes <- dplyr::mutate(cell_id = paste(sample, patient, cell_id, sep = "-")) %>%
haplotypes <- dplyr::mutate(cell_id = paste(patient, sample, cell_id, sep = "-")) %>%
as.data.frame() %>%
orderdf()
}
Expand Down
6 changes: 6 additions & 0 deletions R/heatmap_plot.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

cn_colours <- structure(
c(
"#3182BD", "#9ECAE1", "#CCCCCC", "#FDCC8A", "#FC8D59", "#E34A33",
Expand Down Expand Up @@ -1017,6 +1018,11 @@ plotHeatmap <- function(cn,
}

if (!is.null(clusters)) {
cells_clusters <- length(unique(clusters$cell_id))
cells_data <- length(unique(CNbins$cell_id))
if (cells_data != cells_clusters){
warning("Number of cells in clusters dataframe != number of cells in the bins data!")
}
if (!"clone_id" %in% names(clusters)) {
stop("No clone_id columns in clusters dataframe, you might need to rename your clusters")
}
Expand Down
6 changes: 4 additions & 2 deletions R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -1170,11 +1170,12 @@ plotCNBAF <- function(cn, nfilt = 10^5, plottitle = "5Mb", pointsize = 0.1, ...)

#' Plot BAF distributions per allele specific state
#'
#' @param cn Either a hscn object or a single cell allele specific copy number dataframe with the following columns: `cell_id`, `chr`, `start`, `end`, `state`, `copy`
#' @param cn Either a hscn object or a single cell allele specific copy number dataframe with the following columns: `cell_id`, `chr`, `start`, `end`, `state`, `copy`, `BAF`
#' @param minpts minimum number of points to include default = 250
#' @param minfrac states that are present below this fraction will be removed, default = 0.01
#' @param maxstate States with total copy number > maxstate will be removed, default = 10
#' @param dens_adjust density adjustment factor in the violin plots
#' @param mincounts filter out bins < mincounts from plotting, default = 6
#'
#' \dontrun{
#' data("haplotypes")
Expand All @@ -1185,7 +1186,7 @@ plotCNBAF <- function(cn, nfilt = 10^5, plottitle = "5Mb", pointsize = 0.1, ...)
#' }
#'
#' @export
plotBAFperstate <- function(cn, minpts = 250, minfrac = 0.01, maxstate = 10, dens_adjust = 2.0) {
plotBAFperstate <- function(cn, minpts = 250, minfrac = 0.01, maxstate = 10, dens_adjust = 2.0, mincounts = 6) {
if (is.hscn(cn) | is.ascn(cn)) {
alleleCN <- cn$data
} else {
Expand All @@ -1205,6 +1206,7 @@ plotBAFperstate <- function(cn, minpts = 250, minfrac = 0.01, maxstate = 10, den
allASstates$cBAF[is.nan(allASstates$cBAF)] <- 0.0

forplot <- alleleCN %>%
dplyr::filter(totalcounts > mincounts) %>%
dplyr::group_by(state_AS_phased) %>%
dplyr::mutate(n = dplyr::n()) %>%
dplyr::ungroup() %>%
Expand Down
8 changes: 4 additions & 4 deletions R/plotting_rna.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ per_chr_baf <- function(haps, filtern = 9, perarm = FALSE, labelclones = FALSE)
ggplot2::geom_histogram(bins = 30, alpha = 0.5) +
ggplot2::facet_wrap(~chrarmf, scales = "free_y") +
cowplot::theme_cowplot() +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(0, 1.0)) +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(-0.05, 1.05)) +
cowplot::panel_border() +
ggplot2::geom_vline(xintercept = 0.5, lty = 2, size = 0.5, col = "firebrick4") +
ggplot2::theme(
Expand All @@ -31,7 +31,7 @@ per_chr_baf <- function(haps, filtern = 9, perarm = FALSE, labelclones = FALSE)
ggplot2::geom_histogram(bins = 30, alpha = 0.5) +
ggplot2::facet_wrap(~chrf, scales = "free_y") +
cowplot::theme_cowplot() +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(0, 1.0)) +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(-0.05, 1.05)) +
cowplot::panel_border() +
ggplot2::geom_vline(xintercept = 0.5, lty = 2, size = 0.5, col = "firebrick4") +
ggplot2::theme(
Expand Down Expand Up @@ -66,7 +66,7 @@ per_chr_baf <- function(haps, filtern = 9, perarm = FALSE, labelclones = FALSE)
ggplot2::geom_histogram(bins = 30, alpha = 0.5) +
ggplot2::facet_wrap(~chrarmf, scales = "free_y") +
cowplot::theme_cowplot() +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(0, 1.0)) +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(-0.05, 1.05)) +
cowplot::panel_border() +
ggplot2::geom_vline(xintercept = 0.5, lty = 2, size = 0.5, col = "firebrick4") +
ggplot2::theme(
Expand Down Expand Up @@ -95,7 +95,7 @@ per_chr_baf <- function(haps, filtern = 9, perarm = FALSE, labelclones = FALSE)
ggplot2::geom_histogram(bins = 30, alpha = 0.5) +
ggplot2::facet_wrap(~chrf, scales = "free_y") +
cowplot::theme_cowplot() +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(0, 1.0)) +
ggplot2::scale_x_continuous(breaks = c(0.0, 0.5, 1.0), limits = c(-0.05, 1.05)) +
cowplot::panel_border() +
ggplot2::geom_vline(xintercept = 0.5, lty = 2, size = 0.5, col = "firebrick4") +
ggplot2::theme(
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Then we can call haplotype specific state per cell:
hscn <- callHaplotypeSpecificCN(CNbins, allele_data)
```

Or alterantively allele specific states:
Or alternatively allele specific states:
```r
hscn <- callAlleleSpecificCN(CNbins, allele_data)
```
Expand Down
9 changes: 6 additions & 3 deletions man/plotBAFperstate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit aa97c6e

Please sign in to comment.