-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Option for isolated and colony profiles #30
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,11 +3,13 @@ | |
'aggregate | ||
|
||
Usage: | ||
aggregate.R <sqlite_file> -o <file> | ||
aggregate.R -s <sqlite_file> -o <file> [-t <sc_type>] | ||
|
||
Options: | ||
-h --help Show this screen. | ||
-o <file> --output=<file> per-well aggregated data.' -> doc | ||
-h --help Show this screen. | ||
-s <sqlite_file> --sqlite_file=<sqlite_file> Location of the sql_lite file. | ||
-o <file> --output=<file> per-well aggregated data. | ||
-t <sc_type> --sc_type=<sc_type> which sc_type to focus on [default: none]' -> doc | ||
|
||
suppressWarnings(suppressMessages(library(docopt))) | ||
|
||
|
@@ -19,40 +21,82 @@ suppressWarnings(suppressMessages(library(stringr))) | |
|
||
opts <- docopt(doc) | ||
|
||
db <- src_sqlite(path = opts[["sqlite_file"]]) | ||
sql_file <- opts[["sqlite_file"]] | ||
|
||
output_file <- opts[["output"]] | ||
|
||
sc_type <- opts[["sc_type"]] | ||
|
||
db <- src_sqlite(path = sql_file) | ||
|
||
image <- tbl(src = db, "image") %>% | ||
select(TableNumber, ImageNumber, Metadata_Plate, Metadata_Well) | ||
|
||
aggregate_objects <- function(compartment) { | ||
# If sc_type is specified, group by different columns | ||
if (sc_type == "none") { | ||
strata_cols <- c("Metadata_Plate", "Metadata_Well") | ||
} else { | ||
strata_cols <- c("TableNumber", "ImageNumber", "ObjectNumber", "Metadata_Plate", | ||
"Metadata_Well") | ||
} | ||
|
||
aggregate_objects <- function(compartment, | ||
strata_cols = c("Metadata_Plate", "Metadata_Well"), | ||
sc_type = "none") { | ||
object <- tbl(src = db, compartment) | ||
|
||
object %<>% inner_join(image, by = c("TableNumber", "ImageNumber")) | ||
|
||
if (sc_type == "isolated") { | ||
object <- object %>% dplyr::filter(Cells_Neighbors_NumberOfNeighbors_Adjacent == 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gwaygenomics This wouldn't work when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I originally wrote this (see below), but then realized I am wrong. This may have been my original plan, but I do not remember 100% you are correct, this will fail as currently implemented! This is because we're aggregating compartment by compartment. If we merge everything first and then aggregate, it should work. It seems like some version of this must have worked at one point (because the call on L72 would have errored out for other non-cell compartments). Original thought that is wrongIf I'm remembering correctly, Since we're doing an inner join on L72, this should take care of filtering for other compartments |
||
} else if (sc_type == "colony") { | ||
object <- object %>% dplyr::filter(Cells_Neighbors_NumberOfNeighbors_Adjacent >= 4) | ||
} | ||
|
||
# compartment tag converts nuclei to ^Nuclei_ | ||
compartment_tag <- | ||
str_c("^", str_sub(compartment, 1, 1) %>% str_to_upper(), str_sub(compartment, 2), "_") | ||
|
||
variables <- colnames(object) %>% stringr::str_subset(compartment_tag) | ||
|
||
futile.logger::flog.info(str_c("Started aggregating ", compartment)) | ||
|
||
cytominer::aggregate( | ||
population = object, | ||
variables = variables, | ||
strata = c("Metadata_Plate", "Metadata_Well"), | ||
strata = strata_cols, | ||
operation = "mean" | ||
) %>% collect() | ||
|
||
} | ||
|
||
aggregated <- | ||
aggregate_objects("cells") %>% | ||
inner_join(aggregate_objects("cytoplasm"), | ||
by = c("Metadata_Plate", "Metadata_Well")) %>% | ||
inner_join(aggregate_objects("nuclei"), | ||
by = c("Metadata_Plate", "Metadata_Well")) | ||
aggregate_objects(compartment = "cells", | ||
strata_cols = strata_cols, | ||
sc_type = sc_type) %>% | ||
inner_join( | ||
aggregate_objects(compartment = "cytoplasm", | ||
strata_cols = strata_cols, | ||
sc_type = sc_type), | ||
by = strata_cols | ||
) %>% | ||
inner_join( | ||
aggregate_objects(compartment = "nuclei", | ||
strata_cols = strata_cols, | ||
sc_type = sc_type), | ||
by = strata_cols | ||
) | ||
|
||
if (sc_type != "none") { | ||
futile.logger::flog.info( | ||
paste0("Now collapsing single cell by well for ", output_file) | ||
) | ||
|
||
aggregated <- aggregated %>% | ||
dplyr::group_by_(.dots = c("Metadata_Plate", "Metadata_Well")) %>% | ||
dplyr::summarize_at(.funs = 'mean', .vars = variables) %>% | ||
dplyr::ungroup() | ||
} | ||
|
||
futile.logger::flog.info(paste0("Writing aggregated to ", opts[["output"]])) | ||
futile.logger::flog.info(paste0("Writing aggregated to ", output_file)) | ||
|
||
aggregated %>% readr::write_csv(opts[["output"]]) | ||
aggregated %>% readr::write_csv(output_file) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,7 +37,7 @@ perturbation_mode <- opts[["perturbation_mode"]] | |
|
||
metadata_dir <- paste("../..", "metadata", batch_id, sep = "/") | ||
|
||
backend_dir <- paste("../..", "backend", batch_id, plate_id, sep = "/") | ||
backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gwaygenomics Do you know why the string is being split? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we discussed on the brief chat, indeed, it looks like I am splitting the plate ID because I am inputing the plate ID with the This is a result of the output from the modified This will definitely break in situations where the plate ID has an underscore. Will need to address! |
||
|
||
# read profiles and rename column names | ||
|
||
|
@@ -85,13 +85,13 @@ if (format_broad_cmap) { | |
Metadata_pert_mfc_id = Metadata_broad_sample, | ||
Metadata_pert_well = Metadata_Well, | ||
Metadata_pert_id_vendor = "") | ||
|
||
if ('Metadata_cell_id' %in% names(profiles)) { | ||
message('`cell_id` column present in metadata, will not override.') | ||
|
||
} else { | ||
profiles %<>% mutate(Metadata_cell_id = cell_id) | ||
|
||
} | ||
|
||
if (perturbation_mode == "chemical") { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,7 @@ operation <- opts[["operation"]] | |
|
||
subset <- opts[["subset"]] # e.g. "Metadata_broad_sample_type == '''control'''" | ||
|
||
backend_dir <- paste("../..", "backend", batch_id, plate_id, sep = "/") | ||
backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can probably go (same as before) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
# load profiles | ||
profiles <- suppressMessages(readr::read_csv(paste(backend_dir, paste0(plate_id, "_augmented.csv"), sep = "/"))) | ||
|
@@ -103,7 +103,7 @@ normalize_profiles <- function(compartment) { | |
|
||
normalized <- | ||
cytominer::normalize( | ||
population = load_profiles(compartment = compartment), | ||
population = load_profiles(compartment = compartment) %>% mutate_at(variables, as.double), | ||
variables = variables, | ||
strata = c("Metadata_Plate"), | ||
sample = sample, | ||
|
@@ -123,4 +123,3 @@ normalized <- | |
by = metadata) | ||
|
||
normalized %>% readr::write_csv(paste(backend_dir, paste0(plate_id, "_normalized.csv"), sep = "/")) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,7 +98,7 @@ for (operation in operations) { | |
# This is handled differently because there is no direct way yet to do filtering in cytominer | ||
# TODO: rewrite this after cytominer has an appropriate filtering function for this | ||
testthat::expect_false(is.null(replicates), info="replicates should be specified when performing replicate_correlation") | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The print debugging can go There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. addressed in a3675c9 |
||
feature_replicate_correlations <- | ||
df %>% | ||
cytominer::replicate_correlation( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,8 +37,7 @@ variables_selected <- | |
Reduce(function(df1, df2) dplyr::inner_join(df1, df2, by ="variable"), .) %>% | ||
magrittr::extract2("variable") | ||
|
||
backend_dir <- paste("../..", "backend", batch_id, plate_id, sep = "/") | ||
|
||
backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can probably go (same as before) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
profiles <- paste(backend_dir, paste0(plate_id, "_normalized.csv"), sep = "/") | ||
|
||
profiles_variable_selected <- paste(backend_dir, paste0(plate_id, "_normalized_variable_selected.csv"), sep = "/") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after reviewing, I don't think this change is ready for prime time. A couple more conditional statements that only behave when
sc_type
is specified are required.