-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Wilms-06 update CNV methods: infercnv (2/3) #873
Merged
sjspielman
merged 4 commits into
AlexsLemonade:feature/wilms-tumor-06-azimuth
from
sjspielman:sjspielman/wilms-06_update-infercnv
Nov 12, 2024
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
5569022
Update 06_infercnv.R to actually use the threshold when selecting nor…
sjspielman 4d44715
add testing flag to infercnv script
sjspielman 93636b1
fix meta bug in 06 notebook
sjspielman 8f15a2d
Merge branch 'feature/wilms-tumor-06-azimuth' into sjspielman/wilms-0…
sjspielman File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ author: "Maud PLASCHKA" | |
date: "`r Sys.Date()`" | ||
params: | ||
scpca_project_id: "SCPCP000006" | ||
sample_id: "SCPCS000184" | ||
sample_id: "SCPCS000179" | ||
seed: 12345 | ||
output: | ||
html_document: | ||
|
@@ -286,9 +286,10 @@ wrapper_explore_hmm <- function(infercnv_obj, cnv_threshold = 1) { | |
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) | ||
|
||
#### CNV score | ||
meta <- [email protected] |> | ||
mutate(has_cnv_score = rowSums(meta[, grepl("has_cnv_chr", colnames(meta))])) |> | ||
mutate(has_cnv_score = case_when( | ||
meta <- [email protected] | ||
meta <- meta |> | ||
dplyr::mutate(has_cnv_score = rowSums(meta[, grepl("has_cnv_chr", colnames(meta))])) |> | ||
dplyr::mutate(has_cnv_score = dplyr::case_when( | ||
has_cnv_score > cnv_threshold ~ "CNV", | ||
has_cnv_score <= cnv_threshold ~ "no CNV" | ||
)) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,18 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
# Run `infercnv` for one sample with or without a healthy reference | ||
# infercnv | ||
# Run `infercnv` for one sample with or without a healthy reference and optionally with HMM | ||
# | ||
# USAGE: | ||
# Rscript 06_infercnv.R \ | ||
# --sample_id SCPCS000179 | ||
# --reference one of none, immune, endothelium, both, pull | ||
# --sample_id SCPCS000179 \ | ||
# --reference <none, immune, endothelium, both, pull> \ | ||
# --HMM <i3, i6, no> | ||
# | ||
# Additional optional arguments include: | ||
# --testing: Use this flag if running with test data, to avoid errors when identifying reference cells which may not be present in test data | ||
# --score_threshold: Annotation prediction score threshold to use when identifying cells to use in reference | ||
# --seed: Integer to set the random seed | ||
|
||
|
||
# OUTPUT : | ||
# For every condition, the `infercnv` object and final `infercnv` heatmap are saved in the corresponding {sample_id}/06_infercnv results subfolder | ||
|
@@ -39,6 +45,18 @@ option_list <- list( | |
default = "i3", | ||
help = "If running an additional HMM model to call CNV, either no, or i3 or i6" | ||
), | ||
make_option( | ||
opt_str = c("--score_threshold"), | ||
type = "numeric", | ||
default = 0.85, | ||
help = "Threshold prediction score from label transfer to consider a normal cell in the reference" | ||
), | ||
make_option( | ||
opt_str = c("--testing"), | ||
action = "store_true", | ||
default = FALSE, | ||
help = "This flag should be specified when test data is being used to override the reference option to not use a reference, due to test data limitations." | ||
), | ||
make_option( | ||
opt_str = c("--seed"), | ||
type = "character", | ||
|
@@ -101,34 +119,45 @@ srat <- readRDS( | |
|
||
stopifnot("Incorrect reference provided" = opts$reference %in% c("none", "immune", "endothelium", "both", "pull")) | ||
|
||
normal_label <- "normal" # label normal cells to use as normal | ||
|
||
if (opts$reference %in% c("both", "endothelium", "immune")) { | ||
if (opts$reference == "both") { | ||
normal_cells <- c("endothelium", "immune") | ||
|
||
# keep only the labels actually present in the annotations to avoid infercnv error | ||
normal_cells <- normal_cells[normal_cells %in% unique([email protected]$fetal_kidney_predicted.compartment)] | ||
} else { | ||
normal_cells <- opts$reference | ||
} | ||
# the total count of normal cells needs to be greater than 1 | ||
total_normal <- sum([email protected]$fetal_kidney_predicted.compartment %in% normal_cells) | ||
stopifnot("There must be more than 1 normal cell to use a reference." = length(normal_cells) > 1) | ||
} else if (opts$reference == "none") { | ||
# If we are testing, do not use normal cells regardless of specified reference | ||
if (opts$testing) { | ||
normal_cells <- NULL | ||
} else if (opts$reference == "pull") { | ||
srat_normal <- readRDS(srat_normal_file) | ||
# we merge the spike-in cells into the `Seurat` object | ||
srat <- merge(srat, srat_normal) | ||
srat <- JoinLayers(srat) # else GetAssayData won't work | ||
# we rename the `fetal_kidney_predicted.compartment` for these cells as "spike" | ||
to_rename <- [email protected]$fetal_kidney_predicted.compartment | ||
names(to_rename) <- rownames([email protected]) | ||
to_rename[grepl("spike", names(to_rename))] <- "spike" | ||
[email protected]$fetal_kidney_predicted.compartment <- to_rename | ||
normal_cells <- "spike" | ||
} else { | ||
normal_cells <- opts$reference | ||
if (opts$reference %in% c("both", "endothelium", "immune")) { | ||
if (opts$reference == "both") { | ||
normal_cells <- c("endothelium", "immune") | ||
} else { | ||
normal_cells <- opts$reference | ||
} | ||
|
||
# Determine which cells pass the threshold and rename them to "normal" | ||
to_rename <- [email protected]$fetal_kidney_predicted.compartment | ||
names(to_rename) <- rownames([email protected]) | ||
to_rename[to_rename %in% normal_cells & | ||
[email protected]$fetal_kidney_predicted.compartment.score > opts$score_threshold] <- normal_label | ||
[email protected]$fetal_kidney_predicted.compartment <- to_rename | ||
normal_cells <- normal_label # now they are called "normal" | ||
|
||
# the total count of normal cells should be be greater than 3 | ||
total_normal <- sum([email protected]$fetal_kidney_predicted.compartment == normal_label) | ||
stopifnot("There must be at least 3 normal cells to use a reference." = total_normal >= 3) | ||
} else if (opts$reference == "none") { | ||
normal_cells <- NULL | ||
} else { # pull | ||
srat_normal <- readRDS(srat_normal_file) | ||
# we merge the spike-in cells into the `Seurat` object | ||
srat <- merge(srat, srat_normal) | ||
srat <- JoinLayers(srat) # else GetAssayData won't work | ||
# we rename the `fetal_kidney_predicted.compartment` for these cells as "spike" | ||
to_rename <- [email protected]$fetal_kidney_predicted.compartment | ||
names(to_rename) <- rownames([email protected]) | ||
to_rename[grepl("spike", names(to_rename))] <- normal_label | ||
[email protected]$fetal_kidney_predicted.compartment <- to_rename | ||
normal_cells <- normal_label | ||
} | ||
} | ||
|
||
# Extract raw counts ----------------------------------------------------------- | ||
|
@@ -138,8 +167,6 @@ counts <- GetAssayData(object = srat, assay = "RNA", layer = "counts") | |
annot_df <- data.frame(condition = as.character(srat$fetal_kidney_predicted.compartment)) | ||
rownames(annot_df) <- colnames(counts) | ||
|
||
|
||
# We only run the CNV HMM prediction model if the reference is "both" | ||
HMM_logical <- TRUE | ||
HMM_type <- opts$HMM | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that this comment was removed because it is not accurate.