Skip to content

Commit

Permalink
add concatenateFeatures argument
Browse files Browse the repository at this point in the history
  • Loading branch information
shazanfar committed Aug 2, 2024
1 parent 1ee5b77 commit 40407b1
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 89 deletions.
6 changes: 3 additions & 3 deletions R/EntropyMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#'
#' @param me A Molecule Experiment object.
#' @param featureTypes A character string specifying the feature type. Supported values include
#' "sub_sector", "sub_concentric", "super_sector", and "super_concentric".
#' "subsector", "subconcentric", "supersector", and "superconcentric".
#' @param nCores Number of cores
#' @param ... arguments passing to CountsMatrix
#'
Expand All @@ -14,11 +14,11 @@
#' @examples
#' data(example_me)
#' me <- loadBoundaries(me)
#' ent <- EntropyMatrix(me, c("sub_sector", "sub_concentric", "super_sector", "super_concentric"), nCores = 1)
#' ent <- EntropyMatrix(me, c("subsector", "subconcentric", "supersector", "superconcentric"), nCores = 1)
#' lapply(ent, head, n = 4)
EntropyMatrix <- function(me, featureTypes, nCores = 1, ...) {
# Ensure featureTypes are valid
# if (!all(featureTypes %in% c("sub_sector", "sub_concentric", 'sub_combo', "super_sector", "super_concentric", "super_combo"))) {
# if (!all(featureTypes %in% c("subsector", "subconcentric", 'subcombo', "supersector", "superconcentric", "supercombo"))) {
# stop("Invalid assayName(s) provided!")
# }

Expand Down
4 changes: 2 additions & 2 deletions R/EntropyMatrix_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ annuli_counts = function(mat) {
CountsMatrix <- function(me, assayName, nCores = 1, ...) {
counts_matrix = MoleculeExperiment::countMolecules(me, moleculesAssay = "detected", boundariesAssay = assayName, matrixOnly = TRUE, nCores = nCores, ...)

if (assayName %in% c("sub_sector", "super_sector")) {
if (assayName %in% c("subsector", "supersector")) {
return(counts_matrix)
}

if (assayName %in% c("sub_concentric", "super_concentric")) {
if (assayName %in% c("subconcentric", "superconcentric")) {
return(annuli_counts(counts_matrix))
}

Expand Down
91 changes: 61 additions & 30 deletions R/EntropySummarizedExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#' @param df_list A list of data frames, each containing assay data.
#' @param me A Molecule Experiment object.
#' @param includeCounts logical (default FALSE) whether to include gene counts as features
#' @param concatenateFeatures logical whether to concatenate all the features
#' into a single assay (default FALSE). If FALSE the output SE object has multiple assays
#' @param nCores Number of cores (default 1)
#'
#' @return A SummarizedExperiment object.
Expand All @@ -17,40 +19,54 @@
#' @examples
#' data(example_me)
#' me <- loadBoundaries(me)
#' ent <- EntropyMatrix(me, c("sub_sector", "sub_concentric", "super_sector", "super_concentric"), nCores = 1)
#' ent <- EntropyMatrix(me, c("subsector", "subconcentric", "supersector", "superconcentric"), nCores = 1)
#' se <- EntropySummarizedExperiment(ent, me)
#' se
EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, nCores = 1) {
EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, concatenateFeatures = FALSE, nCores = 1) {

# 1. Assay Data: Using countMolecules function to get assay data.
# Creating the assay_data
assay_data <- make_assay_data(df_list)
assay_data <- make_assay_data(df_list, concatenateFeatures = concatenateFeatures)

if (includeCounts) {
# Generating the genecount
genecount <- as.data.frame(as.matrix(MoleculeExperiment::countMolecules(me,
moleculesAssay = "detected",
boundariesAssay = "cell",
matrixOnly = TRUE,
nCores = nCores)))

# Adding a prefix to the row names of genecount to differentiate it
rownames(genecount) <- paste("genecount", rownames(genecount), sep="_")

# Rbinding the assay_data and genecount together
assay_data <- rbind(assay_data, genecount)
# Generating the genecount
genecount <- as.data.frame(as.matrix(MoleculeExperiment::countMolecules(me,
moleculesAssay = "detected",
boundariesAssay = "cell",
matrixOnly = TRUE,
nCores = nCores)))

# Adding a prefix to the row names of genecount to differentiate it
rownames(genecount) <- paste("genecount", rownames(genecount), sep="_")

if (concatenateFeatures) {

# Rbinding the assay_data and genecount together
assay_data <- rbind(assay_data, genecount)

} else {

assay_data[["genecount"]] <- genecount

}
}

# 2. Row Data
rowData <- data.frame(
FeatureCategory = gsub("_.*", "", rownames(assay_data)),
FeatureGene = gsub(".*_", "", rownames(assay_data))
)
if (concatenateFeatures) {

# Translate "sub" and "super" prefixes to "Subcellular" and "Supercellular" respectively
rowData$FeatureCategory <- gsub("^sub", "Subcellular", rowData$FeatureCategory)
rowData$FeatureCategory <- gsub("^super", "Supercellular", rowData$FeatureCategory)
rowData$FeatureCategory <- gsub("^genecount", "Genecount", rowData$FeatureCategory)
# 2. Row Data
rowData <- data.frame(
FeatureCategory = gsub("_.*", "", rownames(assay_data)),
FeatureGene = gsub(".*_", "", rownames(assay_data))
)

# Translate "sub" and "super" prefixes to "Subcellular" and "Supercellular" respectively
# rowData$FeatureCategory <- gsub("^sub", "Subcellular", rowData$FeatureCategory)
# rowData$FeatureCategory <- gsub("^super", "Supercellular", rowData$FeatureCategory)
# rowData$FeatureCategory <- gsub("^genecount", "Genecount", rowData$FeatureCategory)

} else {
rowData = NULL
}

# 3. Column Data
cell_df <- extract_boundaries_and_centroids(me)[[2]]
Expand All @@ -65,12 +81,27 @@ EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, nCor
# Convert Cell column to a format compatible with the columns in assay_data
colData$Cell <- paste0(colData$Sample_id, ".", colData$Cell)

# Create the SummarizedExperiment object
se <- SummarizedExperiment::SummarizedExperiment(
assays = list(spatialFeatures = as.matrix(assay_data)),
rowData = rowData,
colData = colData
)
if (concatenateFeatures) {
# Create the SummarizedExperiment object
se <- SummarizedExperiment::SummarizedExperiment(
assays = list(spatialFeatures = as.matrix(assay_data)),
rowData = rowData,
colData = colData
)
} else {

rnames = gsub(".*_", "", rownames(assay_data[[1]]))
assay_data <- lapply(assay_data, function(x){
rownames(x) <- rnames
return(x)
})

se <- SummarizedExperiment::SummarizedExperiment(
assays = assay_data,
rowData = rowData,
colData = colData
)
}

return(se)
}
19 changes: 15 additions & 4 deletions R/EntropySummarizedExperiment_utils.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
#' make assay data
#'
#' @param df_list A list of data frames
#' @return A single data frame
make_assay_data = function(df_list) {
do.call(rbind, lapply(names(df_list), function(assayName) {
#' @param concatenateFeatures logical whether to concatenate features (default FALSE)
#' @return if concatenateFeatures == TRUE, A single data frame, otherwise a list
#' containing data frames
make_assay_data = function(df_list, concatenateFeatures = FALSE) {

assay_data_list = lapply(names(df_list), function(assayName) {
df <- df_list[[assayName]]
rownames(df) <- paste(assayName, rownames(df), sep="_")
return(df)
}))
})
names(assay_data_list) <- names(df_list)

if (concatenateFeatures) {
return(do.call(rbind, assay_data_list))
} else {
return(assay_data_list)
}

}
52 changes: 25 additions & 27 deletions R/loadBoundaries_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,72 +21,70 @@
#' @param k A numeric value indicating the scaling factor for concentric polygons.
#' Defaults to 5.
#' @return A list containing dataframes for each assay type:
#' - `sub_sector`: Feature data for sub-sector polygons.
#' - `sub_concentric`: Feature data for sub-concentric polygons.
#' - `sub_combo`: Feature data for sub-combo polygons.
#' - `super_concentric`: Feature data for super-concentric polygons.
#' - `super_combo`: Feature data for super-combo polygons.
#' - `subsector`: Feature data for sub-sector polygons.
#' - `subconcentric`: Feature data for sub-concentric polygons.
#' - `supersector`: Feature data for super-sector polygons.
#' - `superconcentric`: Feature data for super-concentric polygons.
#' @importFrom dplyr group_by mutate arrange select ungroup rowwise distinct %>% bind_rows
#' @importFrom purrr map_dfr
#' @importFrom parallel mclapply
GenerateFeatureData <- function(me, featureTypes = c("sub_sector", "sub_concentric",
"super_sector", "super_concentric"), k = 5) {
GenerateFeatureData <- function(me, featureTypes = c("subsector", "subconcentric",
"supersector", "superconcentric"), k = 5) {
results <- extract_boundaries_and_centroids(me)
df_circle = results$df_circle

featureTypes <- match.arg(featureTypes, several.ok = TRUE)

if ("sub_sector" %in% featureTypes) {
# For sub-sector
sub_sector <- df_circle %>%
if ("subsector" %in% featureTypes) {
# For subsector
subsector <- df_circle %>%
distinct(segment_id, x_location, y_location, .keep_all = TRUE) %>%
mutate(angle = atan2(y_location - y_central, x_location - x_central) + pi) %>%
split(.$segment_id) %>%
lapply(create_sectors) %>%
bind_rows() %>%
dplyr::rename(x_section = x_location_sector, y_section = y_location_sector, area_id = sector_id)
} else {
sub_sector <- NULL
subsector <- NULL
}

if ("super_sector" %in% featureTypes) {
# For super-sector
super_sector <- create_sector_df(df_circle)
if ("supersector" %in% featureTypes) {
# For supersector
supersector <- create_sector_df(df_circle)
} else {
super_sector <- NULL
supersector <- NULL
}

if ("sub_concentric" %in% featureTypes) {
# Common for sub-concentric
if ("subconcentric" %in% featureTypes) {
# Common for subconcentric
common_scale_factors <- generate_scale_factors_all(k)
common_scaled_df <- common_scale_factors %>%
map_dfr(~create_scaled_df_sub(.x, df_circle, k))
# For sub-concentric
sub_concentric <- common_scaled_df %>%
# For subconcentric
subconcentric <- common_scaled_df %>%
select(x_section = x_scaled, y_section = y_scaled, segment_id, sample_id, area_id = concentric_id)
# modify the area_id column
sub_concentric <- sub_concentric %>% mutate(area_id = sapply(area_id, modify_area_id))
subconcentric <- subconcentric %>% mutate(area_id = sapply(area_id, modify_area_id))
} else {
sub_concentric <- NULL
subconcentric <- NULL
}

if ("super_concentric" %in% featureTypes) {
# For super-concentric
if ("superconcentric" %in% featureTypes) {
# For superconcentric
super_scale_factors <- generate_scale_factors_all_outside(k)
super_scaled_df <- super_scale_factors %>%
map_dfr(~create_scaled_df_super(.x, df_circle, k))
super_concentric <- super_scaled_df %>%
superconcentric <- super_scaled_df %>%
select(x_section = x_scaled, y_section = y_scaled, segment_id, sample_id, area_id = concentric_id)
# Modify the area_id column for each table
super_concentric <- super_concentric %>% mutate(area_id = sapply(area_id, modify_area_id))
superconcentric <- superconcentric %>% mutate(area_id = sapply(area_id, modify_area_id))
} else {
super_concentric <- NULL
superconcentric <- NULL
}

featureData = sapply(featureTypes, get, simplify = FALSE, envir = environment())

return(featureData)
# return(list(sub_sector = sub_sector, sub_concentric = sub_concentric, super_sector = super_sector, super_concentric = super_concentric))
}

#' Create sectors from a given data frame
Expand Down
12 changes: 8 additions & 4 deletions R/spatialFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
#' the entropy values into a SummarizedExperiment object.
#'
#' @param me A MoleculeExperiment (ME) object
#' @param featureTypes a character vector listing the types of featureTypes to include
#' @param featureTypes A character string specifying the feature type. Supported values include
#' "subsector", "subconcentric", "supersector", and "superconcentric".
#' @param k A numeric value indicating the number of polygons to calculate entropy (default 5)
#' @param nCores number of cores for parallel processing (default 1)
#' @param includeCounts logical (default FALSE) whether to include gene counts as features
#' @param concatenateFeatures logical whether to concatenate all the features
#' into a single assay (default FALSE). If FALSE the output SE object has multiple assays
#' @param ... arguments passed to loadBoundaries and EntropyMatrix
#' @return A SummarizedExperiment object containing a spatialFeatures assay and cell-level colData
#' @export
Expand All @@ -27,11 +30,12 @@
#' se <- spatialFeatures(me)
#' se
spatialFeatures <- function(me,
featureTypes = c("sub_sector", "sub_concentric",
"super_sector", "super_concentric"),
featureTypes = c("subsector", "subconcentric",
"supersector", "superconcentric"),
k = 5,
nCores = 1,
includeCounts = FALSE,
concatenateFeatures = FALSE,
...) {

# step 1 load new boundaries
Expand All @@ -41,7 +45,7 @@ spatialFeatures <- function(me,
ent = EntropyMatrix(me, nCores = nCores, featureTypes = featureTypes, ...)

# step 3 create SummarizedExperiment
se = EntropySummarizedExperiment(ent, me, includeCounts = includeCounts, nCores = nCores)
se = EntropySummarizedExperiment(ent, me, includeCounts = includeCounts, concatenateFeatures = concatenateFeatures, nCores = nCores)

return(se)
}
4 changes: 2 additions & 2 deletions man/EntropyMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 11 additions & 2 deletions man/EntropySummarizedExperiment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 5 additions & 6 deletions man/GenerateFeatureData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 40407b1

Please sign in to comment.