From 8369881bc0a7cb22a58bd376b4240abd5709e5da Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 26 Jun 2019 14:16:17 +0000 Subject: [PATCH 1/4] Adding Greg's commits --- aggregate.R | 64 +++++++++++++++++++++++++++++++++++------------------ annotate.R | 10 ++------- collate.R | 58 ++++++------------------------------------------ normalize.R | 9 +------- preselect.R | 4 +++- select.R | 3 +-- 6 files changed, 57 insertions(+), 91 deletions(-) diff --git a/aggregate.R b/aggregate.R index bb1e90c..de6d406 100755 --- a/aggregate.R +++ b/aggregate.R @@ -3,56 +3,78 @@ 'aggregate Usage: - aggregate.R -o + aggregate.R -s -o [-t ] Options: - -h --help Show this screen. - -o --output= per-well aggregated data.' -> doc + -h --help Show this screen. + -s --sqlite_file= Location of the sql_lite file. + -o --output= per-well aggregated data. + -t --sc_type= which sc_type to focus on [default: none]' -> doc suppressWarnings(suppressMessages(library(docopt))) - suppressWarnings(suppressMessages(library(dplyr))) - suppressWarnings(suppressMessages(library(magrittr))) - suppressWarnings(suppressMessages(library(stringr))) opts <- docopt(doc) -db <- src_sqlite(path = opts[["sqlite_file"]]) +sql_file <- opts[["sqlite_file"]] +output_file <- opts[["output"]] +sc_type <- opts[["sc_type"]] + +db <- src_sqlite(path = sql_file) image <- tbl(src = db, "image") %>% select(TableNumber, ImageNumber, Metadata_Plate, Metadata_Well) -aggregate_objects <- function(compartment) { +aggregate_objects <- function(compartment, sc_type = "none") { object <- tbl(src = db, compartment) object %<>% inner_join(image, by = c("TableNumber", "ImageNumber")) + if (sc_type == "isolated") { + object <- object %>% dplyr::filter(Cells_Neighbors_NumberOfNeighbors_Adjacent == 0) + } else if (sc_type == "colony") { + object <- object %>% dplyr::filter(Cells_Neighbors_NumberOfNeighbors_Adjacent >= 4) + } + # compartment tag converts nuclei to ^Nuclei_ compartment_tag <- str_c("^", str_sub(compartment, 1, 1) %>% str_to_upper(), str_sub(compartment, 2), "_") variables <- colnames(object) %>% stringr::str_subset(compartment_tag) - futile.logger::flog.info(str_c("Started aggregating ", compartment)) - cytominer::aggregate( - population = object, - variables = variables, - strata = c("Metadata_Plate", "Metadata_Well"), - operation = "mean" - ) %>% collect() + object <- object %>% + dplyr::as_tibble() %>% + dplyr::group_by_(.dots = c("TableNumber", "ImageNumber", + "ObjectNumber", "Metadata_Plate", + "Metadata_Well")) %>% + dplyr::summarise_at(.funs = 'mean', .vars = variables) %>% + dplyr::ungroup() + return(object) } -aggregated <- - aggregate_objects("cells") %>% +aggregate_cols <- c("TableNumber", "ImageNumber", "ObjectNumber", + "Metadata_Plate", "Metadata_Well") +sc_objects <- + aggregate_objects("cells", + sc_type = sc_type) %>% inner_join(aggregate_objects("cytoplasm"), - by = c("Metadata_Plate", "Metadata_Well")) %>% + by = aggregate_cols) %>% inner_join(aggregate_objects("nuclei"), - by = c("Metadata_Plate", "Metadata_Well")) + by = aggregate_cols) + +futile.logger::flog.info(paste0("Now collapsing by well for ", output_file)) + +variables <- colnames(sc_objects) %>% stringr::str_subset("^Cells|^Nuclei|^Cytoplasm") + +sc_objects <- sc_objects %>% + dplyr::group_by_(.dots = c("Metadata_Plate", "Metadata_Well")) %>% + dplyr::summarize_at(.funs = 'mean', .vars = variables) %>% + dplyr::ungroup() -futile.logger::flog.info(paste0("Writing aggregated to ", opts[["output"]])) +futile.logger::flog.info(paste0("Writing aggregated to ", output_file)) -aggregated %>% readr::write_csv(opts[["output"]]) +sc_objects %>% readr::write_csv(output_file) diff --git a/annotate.R b/annotate.R index d936ed5..c4e1b55 100755 --- a/annotate.R +++ b/annotate.R @@ -24,21 +24,15 @@ suppressWarnings(suppressMessages(library(readr))) opts <- docopt(doc) batch_id <- opts[["batch_id"]] - external_metadata <- opts[["external_metadata"]] - cell_id <- opts[["cell_id"]] - format_broad_cmap <- opts[["format_broad_cmap"]] - plate_id <- opts[["plate_id"]] - perturbation_mode <- opts[["perturbation_mode"]] - metadata_dir <- paste("../..", "metadata", batch_id, sep = "/") +backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") -backend_dir <- paste("../..", "backend", batch_id, plate_id, sep = "/") - +print(backend_dir) # read profiles and rename column names profiles <- suppressMessages(readr::read_csv(paste(backend_dir, paste0(plate_id, ".csv"), sep = "/"))) diff --git a/collate.R b/collate.R index 59a7e05..ab2951f 100755 --- a/collate.R +++ b/collate.R @@ -19,45 +19,27 @@ Options: -h --help Show this screen.' -> doc suppressWarnings(suppressMessages(library(docopt))) - suppressWarnings(suppressMessages(library(dplyr))) - suppressWarnings(suppressMessages(library(magrittr))) - suppressWarnings(suppressMessages(library(stringr))) -#str <- "-b 2017_12_05_Batch2 -p BR00092655 -c ingest_config.ini -d -r s3://imaging-platform/projects/2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad/workspace" - -#opts <- docopt(doc, str) - opts <- docopt(doc) batch_id <- opts[["batch_id"]] - column_as_plate <- opts[["column_as_plate"]] - config <- opts[["config"]] - download <- opts[["download"]] - munge <- opts[["munge"]] - overwrite_backend_cache <- opts[["overwrite_backend_cache"]] - pipeline_name <- opts[["pipeline_name"]] - plate_id <- opts[["plate_id"]] - remote_base_dir <- opts[["remote_base_dir"]] - tmpdir <- opts[["tmpdir"]] stopifnot(!download || !is.null(remote_base_dir)) base_dir = "../.." -# str(opts) - input_dir <- file.path(base_dir, "analysis", batch_id, plate_id, pipeline_name) if (download) { @@ -87,15 +69,11 @@ if (!dir.exists(backend_dir)) { dir.create(backend_dir, recursive = TRUE) } if (!dir.exists(cache_backend_dir)) { dir.create(cache_backend_dir, recursive = TRUE) } backend_dir %<>% normalizePath() - cache_backend_dir %<>% normalizePath() - cache_backend_file <- file.path(cache_backend_dir, paste0(plate_id, ".sqlite")) - cache_aggregated_file <- file.path(cache_backend_dir, paste0(plate_id, ".csv")) backend_file <- file.path(backend_dir, paste0(plate_id, ".sqlite")) - aggregated_file <- file.path(backend_dir, paste0(plate_id, ".csv")) if (file.exists(backend_file) & file.exists(aggregated_file)) { @@ -106,8 +84,13 @@ if (file.exists(backend_file) & file.exists(aggregated_file)) { } if (!file.exists(cache_backend_file) | overwrite_backend_cache) { - ingest_cmd <- paste("cytominer-database", "ingest", input_dir, paste0("sqlite:///", cache_backend_file), - "-c", config, ifelse(munge, "--munge", "--no-munge")) + ingest_cmd <- paste("cytominer-database", + "ingest", + input_dir, + paste0("sqlite:///", cache_backend_file), + "-c", + config, + ifelse(munge, "--munge", "--no-munge")) if (file.exists(cache_backend_file)) { file.remove(cache_backend_file) @@ -116,9 +99,7 @@ if (!file.exists(cache_backend_file) | overwrite_backend_cache) { # ingest futile.logger::flog.info("Ingesting...") - system(ingest_cmd) - stopifnot(file.exists(cache_backend_file)) # add a column `Metadata_Plate` if specified @@ -126,80 +107,55 @@ if (!file.exists(cache_backend_file) | overwrite_backend_cache) { if(!is.null(column_as_plate)) { system(paste("sqlite3", cache_backend_file, "'ALTER TABLE Image ADD COLUMN Metadata_Plate TEXT;'")) - system(paste("sqlite3", cache_backend_file, "'UPDATE image SET Metadata_Plate =", column_as_plate, ";'")) } # create index - index_cmd <- paste("sqlite3", cache_backend_file, "< indices.sql") - futile.logger::flog.info("Indexing...") system(index_cmd) - } # create aggregated (even if it already exists) - aggregate_cmd <- paste("./aggregate.R", cache_backend_file, "-o", cache_aggregated_file) - futile.logger::flog.info("Aggregating...") - system(aggregate_cmd) - stopifnot(file.exists(cache_aggregated_file)) move_and_check <- function(src, dst) { file.copy(src, dst) - stopifnot(tools::md5sum(src) == tools::md5sum(dst)) - file.remove(src) - invisible() - } futile.logger::flog.info("Moving...") if (download) { - remote_backend_dir <- file.path(remote_base_dir, "backend", batch_id, plate_id) - remote_backend_file <- file.path(remote_backend_dir, paste0(plate_id, ".sqlite")) - remote_aggregated_file <- file.path(remote_backend_dir, paste0(plate_id, ".csv")) sync_str <- paste("aws s3 cp", cache_backend_file, remote_backend_file, sep = " ") - futile.logger::flog.info("Uploading backend_file ...") - stopifnot(system(sync_str) == 0) - sync_str <- paste("aws s3 cp", cache_aggregated_file, remote_aggregated_file, sep = " ") futile.logger::flog.info("Uploading aggregated_file ...") - stopifnot(system(sync_str) == 0) futile.logger::flog.info("Deleting cache_backend_file ...") - file.remove(cache_backend_file) futile.logger::flog.info("Deleting cache_aggregated_file ...") - file.remove(cache_aggregated_file) futile.logger::flog.info("Deleting input_dir ...") - unlink(input_dir, recursive = TRUE) } else { move_and_check(cache_backend_file, backend_file) - move_and_check(cache_aggregated_file, aggregated_file) - - } diff --git a/normalize.R b/normalize.R index fb17435..a028259 100755 --- a/normalize.R +++ b/normalize.R @@ -15,26 +15,19 @@ Options: -t --tmpdir= Temporary directory [default: /tmp]' -> doc suppressWarnings(suppressMessages(library(docopt))) - suppressWarnings(suppressMessages(library(dplyr))) - suppressWarnings(suppressMessages(library(magrittr))) - suppressWarnings(suppressMessages(library(stringr))) opts <- docopt(doc) batch_id <- opts[["batch_id"]] - sample_single_cell <- opts[["sample_single_cell"]] - plate_id <- opts[["plate_id"]] - operation <- opts[["operation"]] - subset <- opts[["subset"]] # e.g. "Metadata_broad_sample_type == '''control'''" -backend_dir <- paste("../..", "backend", batch_id, plate_id, sep = "/") +backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") # load profiles profiles <- suppressMessages(readr::read_csv(paste(backend_dir, paste0(plate_id, "_augmented.csv"), sep = "/"))) diff --git a/preselect.R b/preselect.R index 701a3ca..8ad88d9 100755 --- a/preselect.R +++ b/preselect.R @@ -98,7 +98,9 @@ for (operation in operations) { # This is handled differently because there is no direct way yet to do filtering in cytominer # TODO: rewrite this after cytominer has an appropriate filtering function for this testthat::expect_false(is.null(replicates), info="replicates should be specified when performing replicate_correlation") - + + head(df) + print(variables) feature_replicate_correlations <- df %>% cytominer::replicate_correlation( diff --git a/select.R b/select.R index 53e5700..aaa333e 100755 --- a/select.R +++ b/select.R @@ -37,8 +37,7 @@ variables_selected <- Reduce(function(df1, df2) dplyr::inner_join(df1, df2, by ="variable"), .) %>% magrittr::extract2("variable") -backend_dir <- paste("../..", "backend", batch_id, plate_id, sep = "/") - +backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") profiles <- paste(backend_dir, paste0(plate_id, "_normalized.csv"), sep = "/") profiles_variable_selected <- paste(backend_dir, paste0(plate_id, "_normalized_variable_selected.csv"), sep = "/") From a3675c97fd7290cd1843e920352f1dab8e723353 Mon Sep 17 00:00:00 2001 From: gwaygenomics Date: Thu, 27 Jun 2019 16:26:34 -0400 Subject: [PATCH 2/4] add back empty lines to match repo style --- aggregate.R | 7 ++++++- annotate.R | 14 ++++++++++---- collate.R | 41 +++++++++++++++++++++++++++++++++++++++++ normalize.R | 8 +++++++- preselect.R | 2 -- 5 files changed, 64 insertions(+), 8 deletions(-) diff --git a/aggregate.R b/aggregate.R index de6d406..63a5b2d 100755 --- a/aggregate.R +++ b/aggregate.R @@ -12,14 +12,19 @@ Options: -t --sc_type= which sc_type to focus on [default: none]' -> doc suppressWarnings(suppressMessages(library(docopt))) + suppressWarnings(suppressMessages(library(dplyr))) + suppressWarnings(suppressMessages(library(magrittr))) + suppressWarnings(suppressMessages(library(stringr))) opts <- docopt(doc) sql_file <- opts[["sqlite_file"]] + output_file <- opts[["output"]] + sc_type <- opts[["sc_type"]] db <- src_sqlite(path = sql_file) @@ -47,7 +52,7 @@ aggregate_objects <- function(compartment, sc_type = "none") { object <- object %>% dplyr::as_tibble() %>% - dplyr::group_by_(.dots = c("TableNumber", "ImageNumber", + dplyr::group_by_(.dots = c("TableNumber", "ImageNumber", "ObjectNumber", "Metadata_Plate", "Metadata_Well")) %>% dplyr::summarise_at(.funs = 'mean', .vars = variables) %>% diff --git a/annotate.R b/annotate.R index c4e1b55..e25fe44 100755 --- a/annotate.R +++ b/annotate.R @@ -24,15 +24,21 @@ suppressWarnings(suppressMessages(library(readr))) opts <- docopt(doc) batch_id <- opts[["batch_id"]] + external_metadata <- opts[["external_metadata"]] + cell_id <- opts[["cell_id"]] + format_broad_cmap <- opts[["format_broad_cmap"]] + plate_id <- opts[["plate_id"]] + perturbation_mode <- opts[["perturbation_mode"]] + metadata_dir <- paste("../..", "metadata", batch_id, sep = "/") + backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") -print(backend_dir) # read profiles and rename column names profiles <- suppressMessages(readr::read_csv(paste(backend_dir, paste0(plate_id, ".csv"), sep = "/"))) @@ -79,13 +85,13 @@ if (format_broad_cmap) { Metadata_pert_mfc_id = Metadata_broad_sample, Metadata_pert_well = Metadata_Well, Metadata_pert_id_vendor = "") - + if ('Metadata_cell_id' %in% names(profiles)) { message('`cell_id` column present in metadata, will not override.') - + } else { profiles %<>% mutate(Metadata_cell_id = cell_id) - + } if (perturbation_mode == "chemical") { diff --git a/collate.R b/collate.R index ab2951f..2c5d415 100755 --- a/collate.R +++ b/collate.R @@ -19,21 +19,33 @@ Options: -h --help Show this screen.' -> doc suppressWarnings(suppressMessages(library(docopt))) + suppressWarnings(suppressMessages(library(dplyr))) + suppressWarnings(suppressMessages(library(magrittr))) + suppressWarnings(suppressMessages(library(stringr))) opts <- docopt(doc) batch_id <- opts[["batch_id"]] + column_as_plate <- opts[["column_as_plate"]] + config <- opts[["config"]] + download <- opts[["download"]] + munge <- opts[["munge"]] + overwrite_backend_cache <- opts[["overwrite_backend_cache"]] + pipeline_name <- opts[["pipeline_name"]] + plate_id <- opts[["plate_id"]] + remote_base_dir <- opts[["remote_base_dir"]] + tmpdir <- opts[["tmpdir"]] stopifnot(!download || !is.null(remote_base_dir)) @@ -69,11 +81,15 @@ if (!dir.exists(backend_dir)) { dir.create(backend_dir, recursive = TRUE) } if (!dir.exists(cache_backend_dir)) { dir.create(cache_backend_dir, recursive = TRUE) } backend_dir %<>% normalizePath() + cache_backend_dir %<>% normalizePath() + cache_backend_file <- file.path(cache_backend_dir, paste0(plate_id, ".sqlite")) + cache_aggregated_file <- file.path(cache_backend_dir, paste0(plate_id, ".csv")) backend_file <- file.path(backend_dir, paste0(plate_id, ".sqlite")) + aggregated_file <- file.path(backend_dir, paste0(plate_id, ".csv")) if (file.exists(backend_file) & file.exists(aggregated_file)) { @@ -99,7 +115,9 @@ if (!file.exists(cache_backend_file) | overwrite_backend_cache) { # ingest futile.logger::flog.info("Ingesting...") + system(ingest_cmd) + stopifnot(file.exists(cache_backend_file)) # add a column `Metadata_Plate` if specified @@ -107,55 +125,78 @@ if (!file.exists(cache_backend_file) | overwrite_backend_cache) { if(!is.null(column_as_plate)) { system(paste("sqlite3", cache_backend_file, "'ALTER TABLE Image ADD COLUMN Metadata_Plate TEXT;'")) + system(paste("sqlite3", cache_backend_file, "'UPDATE image SET Metadata_Plate =", column_as_plate, ";'")) } # create index + index_cmd <- paste("sqlite3", cache_backend_file, "< indices.sql") + futile.logger::flog.info("Indexing...") system(index_cmd) } # create aggregated (even if it already exists) + aggregate_cmd <- paste("./aggregate.R", cache_backend_file, "-o", cache_aggregated_file) + futile.logger::flog.info("Aggregating...") + system(aggregate_cmd) + stopifnot(file.exists(cache_aggregated_file)) move_and_check <- function(src, dst) { file.copy(src, dst) + stopifnot(tools::md5sum(src) == tools::md5sum(dst)) + file.remove(src) + invisible() + } futile.logger::flog.info("Moving...") if (download) { + remote_backend_dir <- file.path(remote_base_dir, "backend", batch_id, plate_id) + remote_backend_file <- file.path(remote_backend_dir, paste0(plate_id, ".sqlite")) + remote_aggregated_file <- file.path(remote_backend_dir, paste0(plate_id, ".csv")) sync_str <- paste("aws s3 cp", cache_backend_file, remote_backend_file, sep = " ") + futile.logger::flog.info("Uploading backend_file ...") + stopifnot(system(sync_str) == 0) + sync_str <- paste("aws s3 cp", cache_aggregated_file, remote_aggregated_file, sep = " ") futile.logger::flog.info("Uploading aggregated_file ...") + stopifnot(system(sync_str) == 0) futile.logger::flog.info("Deleting cache_backend_file ...") + file.remove(cache_backend_file) futile.logger::flog.info("Deleting cache_aggregated_file ...") + file.remove(cache_aggregated_file) futile.logger::flog.info("Deleting input_dir ...") + unlink(input_dir, recursive = TRUE) } else { move_and_check(cache_backend_file, backend_file) + move_and_check(cache_aggregated_file, aggregated_file) + } diff --git a/normalize.R b/normalize.R index a028259..0989d76 100755 --- a/normalize.R +++ b/normalize.R @@ -15,16 +15,23 @@ Options: -t --tmpdir= Temporary directory [default: /tmp]' -> doc suppressWarnings(suppressMessages(library(docopt))) + suppressWarnings(suppressMessages(library(dplyr))) + suppressWarnings(suppressMessages(library(magrittr))) + suppressWarnings(suppressMessages(library(stringr))) opts <- docopt(doc) batch_id <- opts[["batch_id"]] + sample_single_cell <- opts[["sample_single_cell"]] + plate_id <- opts[["plate_id"]] + operation <- opts[["operation"]] + subset <- opts[["subset"]] # e.g. "Metadata_broad_sample_type == '''control'''" backend_dir <- paste("../..", "backend", batch_id, unlist(strsplit(plate_id, "_"))[1], sep = "/") @@ -116,4 +123,3 @@ normalized <- by = metadata) normalized %>% readr::write_csv(paste(backend_dir, paste0(plate_id, "_normalized.csv"), sep = "/")) - diff --git a/preselect.R b/preselect.R index 8ad88d9..27a9858 100755 --- a/preselect.R +++ b/preselect.R @@ -99,8 +99,6 @@ for (operation in operations) { # TODO: rewrite this after cytominer has an appropriate filtering function for this testthat::expect_false(is.null(replicates), info="replicates should be specified when performing replicate_correlation") - head(df) - print(variables) feature_replicate_correlations <- df %>% cytominer::replicate_correlation( From 1340d0ead38666dcdf7b6d95b13532a4c370341a Mon Sep 17 00:00:00 2001 From: gwaygenomics Date: Thu, 27 Jun 2019 17:09:02 -0400 Subject: [PATCH 3/4] modify aggregate to work with single cell option (and to also not break without single cell mode) --- aggregate.R | 71 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/aggregate.R b/aggregate.R index 63a5b2d..12c1f00 100755 --- a/aggregate.R +++ b/aggregate.R @@ -32,7 +32,17 @@ db <- src_sqlite(path = sql_file) image <- tbl(src = db, "image") %>% select(TableNumber, ImageNumber, Metadata_Plate, Metadata_Well) -aggregate_objects <- function(compartment, sc_type = "none") { +# If sc_type is specified, group by different columns +if (sc_type == "none") { + strata_cols <- c("Metadata_Plate", "Metadata_Well") +} else { + strata_cols <- c("TableNumber", "ImageNumber", "ObjectNumber", "Metadata_Plate", + "Metadata_Well") +} + +aggregate_objects <- function(compartment, + strata_cols = c("Metadata_Plate", "Metadata_Well"), + sc_type = "none") { object <- tbl(src = db, compartment) object %<>% inner_join(image, by = c("TableNumber", "ImageNumber")) @@ -50,36 +60,43 @@ aggregate_objects <- function(compartment, sc_type = "none") { variables <- colnames(object) %>% stringr::str_subset(compartment_tag) futile.logger::flog.info(str_c("Started aggregating ", compartment)) - object <- object %>% - dplyr::as_tibble() %>% - dplyr::group_by_(.dots = c("TableNumber", "ImageNumber", - "ObjectNumber", "Metadata_Plate", - "Metadata_Well")) %>% - dplyr::summarise_at(.funs = 'mean', .vars = variables) %>% - dplyr::ungroup() + cytominer::aggregate( + population = object, + variables = variables, + strata = strata_cols, + operation = "mean" + ) %>% collect() - return(object) } -aggregate_cols <- c("TableNumber", "ImageNumber", "ObjectNumber", - "Metadata_Plate", "Metadata_Well") -sc_objects <- - aggregate_objects("cells", +aggregated <- + aggregate_objects(compartment = "cells", + strata_cols = strata_cols, sc_type = sc_type) %>% - inner_join(aggregate_objects("cytoplasm"), - by = aggregate_cols) %>% - inner_join(aggregate_objects("nuclei"), - by = aggregate_cols) - -futile.logger::flog.info(paste0("Now collapsing by well for ", output_file)) - -variables <- colnames(sc_objects) %>% stringr::str_subset("^Cells|^Nuclei|^Cytoplasm") - -sc_objects <- sc_objects %>% - dplyr::group_by_(.dots = c("Metadata_Plate", "Metadata_Well")) %>% - dplyr::summarize_at(.funs = 'mean', .vars = variables) %>% - dplyr::ungroup() + inner_join( + aggregate_objects(compartment = "cytoplasm", + strata_cols = strata_cols, + sc_type = sc_type), + by = strata_cols + ) %>% + inner_join( + aggregate_objects(compartment = "nuclei", + strata_cols = strata_cols, + sc_type = sc_type), + by = strata_cols + ) + +if (sc_type != "none") { + futile.logger::flog.info( + paste0("Now collapsing single cell by well for ", output_file) + ) + + aggregated <- aggregated %>% + dplyr::group_by_(.dots = c("Metadata_Plate", "Metadata_Well")) %>% + dplyr::summarize_at(.funs = 'mean', .vars = variables) %>% + dplyr::ungroup() +} futile.logger::flog.info(paste0("Writing aggregated to ", output_file)) -sc_objects %>% readr::write_csv(output_file) +aggregated %>% readr::write_csv(output_file) From 3b72f6c53e7d8a95d89c8740ddddb36be8a0971b Mon Sep 17 00:00:00 2001 From: Shantanu Singh Date: Fri, 18 Oct 2019 01:52:22 -0400 Subject: [PATCH 4/4] Cast variables to double in population data frame --- normalize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/normalize.R b/normalize.R index 0989d76..b5a1990 100755 --- a/normalize.R +++ b/normalize.R @@ -103,7 +103,7 @@ normalize_profiles <- function(compartment) { normalized <- cytominer::normalize( - population = load_profiles(compartment = compartment), + population = load_profiles(compartment = compartment) %>% mutate_at(variables, as.double), variables = variables, strata = c("Metadata_Plate"), sample = sample,