Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wilms-06 update CNV methods: infercnv (2/3) #873

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ author: "Maud PLASCHKA"
date: "`r Sys.Date()`"
params:
scpca_project_id: "SCPCP000006"
sample_id: "SCPCS000184"
sample_id: "SCPCS000179"
seed: 12345
output:
html_document:
Expand Down Expand Up @@ -286,9 +286,10 @@ wrapper_explore_hmm <- function(infercnv_obj, cnv_threshold = 1) {
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

#### CNV score
meta <- [email protected] |>
mutate(has_cnv_score = rowSums(meta[, grepl("has_cnv_chr", colnames(meta))])) |>
mutate(has_cnv_score = case_when(
meta <- [email protected]
meta <- meta |>
dplyr::mutate(has_cnv_score = rowSums(meta[, grepl("has_cnv_chr", colnames(meta))])) |>
dplyr::mutate(has_cnv_score = dplyr::case_when(
has_cnv_score > cnv_threshold ~ "CNV",
has_cnv_score <= cnv_threshold ~ "no CNV"
))
Expand Down
87 changes: 57 additions & 30 deletions analyses/cell-type-wilms-tumor-06/scripts/06_infercnv.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
#!/usr/bin/env Rscript

# Run `infercnv` for one sample with or without a healthy reference
# infercnv
# Run `infercnv` for one sample with or without a healthy reference and optionally with HMM
#
# USAGE:
# Rscript 06_infercnv.R \
# --sample_id SCPCS000179
# --reference one of none, immune, endothelium, both, pull
# --sample_id SCPCS000179 \
# --reference <none, immune, endothelium, both, pull> \
# --HMM <i3, i6, no>
#
# Additional optional arguments include:
# --testing: Use this flag if running with test data, to avoid errors when identifying reference cells which may not be present in test data
# --score_threshold: Annotation prediction score threshold to use when identifying cells to use in reference
# --seed: Integer to set the random seed


# OUTPUT :
# For every condition, the `infercnv` object and final `infercnv` heatmap are saved in the corresponding {sample_id}/06_infercnv results subfolder
Expand Down Expand Up @@ -39,6 +45,18 @@ option_list <- list(
default = "i3",
help = "If running an additional HMM model to call CNV, either no, or i3 or i6"
),
make_option(
opt_str = c("--score_threshold"),
type = "numeric",
default = 0.85,
help = "Threshold prediction score from label transfer to consider a normal cell in the reference"
),
make_option(
opt_str = c("--testing"),
action = "store_true",
default = FALSE,
help = "This flag should be specified when test data is being used to override the reference option to not use a reference, due to test data limitations."
),
make_option(
opt_str = c("--seed"),
type = "character",
Expand Down Expand Up @@ -101,34 +119,45 @@ srat <- readRDS(

stopifnot("Incorrect reference provided" = opts$reference %in% c("none", "immune", "endothelium", "both", "pull"))

normal_label <- "normal" # label normal cells to use as normal

if (opts$reference %in% c("both", "endothelium", "immune")) {
if (opts$reference == "both") {
normal_cells <- c("endothelium", "immune")

# keep only the labels actually present in the annotations to avoid infercnv error
normal_cells <- normal_cells[normal_cells %in% unique([email protected]$fetal_kidney_predicted.compartment)]
} else {
normal_cells <- opts$reference
}
# the total count of normal cells needs to be greater than 1
total_normal <- sum([email protected]$fetal_kidney_predicted.compartment %in% normal_cells)
stopifnot("There must be more than 1 normal cell to use a reference." = length(normal_cells) > 1)
} else if (opts$reference == "none") {
# If we are testing, do not use normal cells regardless of specified reference
if (opts$testing) {
normal_cells <- NULL
} else if (opts$reference == "pull") {
srat_normal <- readRDS(srat_normal_file)
# we merge the spike-in cells into the `Seurat` object
srat <- merge(srat, srat_normal)
srat <- JoinLayers(srat) # else GetAssayData won't work
# we rename the `fetal_kidney_predicted.compartment` for these cells as "spike"
to_rename <- [email protected]$fetal_kidney_predicted.compartment
names(to_rename) <- rownames([email protected])
to_rename[grepl("spike", names(to_rename))] <- "spike"
[email protected]$fetal_kidney_predicted.compartment <- to_rename
normal_cells <- "spike"
} else {
normal_cells <- opts$reference
if (opts$reference %in% c("both", "endothelium", "immune")) {
if (opts$reference == "both") {
normal_cells <- c("endothelium", "immune")
} else {
normal_cells <- opts$reference
}

# Determine which cells pass the threshold and rename them to "normal"
to_rename <- [email protected]$fetal_kidney_predicted.compartment
names(to_rename) <- rownames([email protected])
to_rename[to_rename %in% normal_cells &
[email protected]$fetal_kidney_predicted.compartment.score > opts$score_threshold] <- normal_label
[email protected]$fetal_kidney_predicted.compartment <- to_rename
normal_cells <- normal_label # now they are called "normal"

# the total count of normal cells should be be greater than 3
total_normal <- sum([email protected]$fetal_kidney_predicted.compartment == normal_label)
stopifnot("There must be at least 3 normal cells to use a reference." = total_normal >= 3)
} else if (opts$reference == "none") {
normal_cells <- NULL
} else { # pull
srat_normal <- readRDS(srat_normal_file)
# we merge the spike-in cells into the `Seurat` object
srat <- merge(srat, srat_normal)
srat <- JoinLayers(srat) # else GetAssayData won't work
# we rename the `fetal_kidney_predicted.compartment` for these cells as "spike"
to_rename <- [email protected]$fetal_kidney_predicted.compartment
names(to_rename) <- rownames([email protected])
to_rename[grepl("spike", names(to_rename))] <- normal_label
[email protected]$fetal_kidney_predicted.compartment <- to_rename
normal_cells <- normal_label
}
}

# Extract raw counts -----------------------------------------------------------
Expand All @@ -138,8 +167,6 @@ counts <- GetAssayData(object = srat, assay = "RNA", layer = "counts")
annot_df <- data.frame(condition = as.character(srat$fetal_kidney_predicted.compartment))
rownames(annot_df) <- colnames(counts)


# We only run the CNV HMM prediction model if the reference is "both"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this comment was removed because it is not accurate.

HMM_logical <- TRUE
HMM_type <- opts$HMM

Expand Down