add concatenateFeatures argument

SydneyBioX · Aug 2, 2024 · 40407b1 · 40407b1
1 parent 1ee5b77
commit 40407b1
Show file tree

Hide file tree

Showing 12 changed files with 149 additions and 89 deletions.
diff --git a/R/EntropyMatrix.R b/R/EntropyMatrix.R
@@ -5,7 +5,7 @@
 #'
 #' @param me A Molecule Experiment object.
 #' @param featureTypes A character string specifying the feature type. Supported values include
-#' "sub_sector", "sub_concentric", "super_sector", and "super_concentric".
+#' "subsector", "subconcentric", "supersector", and "superconcentric".
 #' @param nCores Number of cores
 #' @param ... arguments passing to CountsMatrix
 #'
@@ -14,11 +14,11 @@
 #' @examples
 #' data(example_me)
 #' me <- loadBoundaries(me)
-#' ent <- EntropyMatrix(me, c("sub_sector", "sub_concentric", "super_sector", "super_concentric"), nCores = 1)
+#' ent <- EntropyMatrix(me, c("subsector", "subconcentric", "supersector", "superconcentric"), nCores = 1)
 #' lapply(ent, head, n = 4)
 EntropyMatrix <- function(me, featureTypes, nCores = 1, ...) {
   # Ensure featureTypes are valid
-  # if (!all(featureTypes %in% c("sub_sector", "sub_concentric", 'sub_combo', "super_sector", "super_concentric", "super_combo"))) {
+  # if (!all(featureTypes %in% c("subsector", "subconcentric", 'subcombo', "supersector", "superconcentric", "supercombo"))) {
   #   stop("Invalid assayName(s) provided!")
   # }
 

diff --git a/R/EntropyMatrix_utils.R b/R/EntropyMatrix_utils.R
@@ -38,11 +38,11 @@ annuli_counts = function(mat) {
 CountsMatrix <- function(me, assayName, nCores = 1, ...) {
   counts_matrix = MoleculeExperiment::countMolecules(me, moleculesAssay = "detected", boundariesAssay = assayName, matrixOnly = TRUE, nCores = nCores, ...)
 
-  if (assayName %in% c("sub_sector", "super_sector")) {
+  if (assayName %in% c("subsector", "supersector")) {
     return(counts_matrix)
   }
 
-  if (assayName %in% c("sub_concentric", "super_concentric")) {
+  if (assayName %in% c("subconcentric", "superconcentric")) {
     return(annuli_counts(counts_matrix))
   }
 

diff --git a/R/EntropySummarizedExperiment.R b/R/EntropySummarizedExperiment.R
@@ -6,6 +6,8 @@
 #' @param df_list A list of data frames, each containing assay data.
 #' @param me A Molecule Experiment object.
 #' @param includeCounts logical (default FALSE) whether to include gene counts as features
+#' @param concatenateFeatures logical whether to concatenate all the features
+#' into a single assay (default FALSE). If FALSE the output SE object has multiple assays
 #' @param nCores Number of cores (default 1)
 #'
 #' @return A SummarizedExperiment object.
@@ -17,40 +19,54 @@
 #' @examples
 #' data(example_me)
 #' me <- loadBoundaries(me)
-#' ent <- EntropyMatrix(me, c("sub_sector", "sub_concentric", "super_sector", "super_concentric"), nCores = 1)
+#' ent <- EntropyMatrix(me, c("subsector", "subconcentric", "supersector", "superconcentric"), nCores = 1)
 #' se <- EntropySummarizedExperiment(ent, me)
 #' se
-EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, nCores = 1) {
+EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, concatenateFeatures = FALSE, nCores = 1) {
 
   # 1. Assay Data: Using countMolecules function to get assay data.
   # Creating the assay_data
-  assay_data <- make_assay_data(df_list)
+  assay_data <- make_assay_data(df_list, concatenateFeatures = concatenateFeatures)
 
   if (includeCounts) {
-  # Generating the genecount
-  genecount <- as.data.frame(as.matrix(MoleculeExperiment::countMolecules(me,
-                                                                          moleculesAssay = "detected",
-                                                                          boundariesAssay = "cell",
-                                                                          matrixOnly = TRUE,
-                                                                          nCores = nCores)))
-
-  # Adding a prefix to the row names of genecount to differentiate it
-  rownames(genecount) <- paste("genecount", rownames(genecount), sep="_")
-
-  # Rbinding the assay_data and genecount together
-  assay_data <- rbind(assay_data, genecount)
+    # Generating the genecount
+    genecount <- as.data.frame(as.matrix(MoleculeExperiment::countMolecules(me,
+                                                                            moleculesAssay = "detected",
+                                                                            boundariesAssay = "cell",
+                                                                            matrixOnly = TRUE,
+                                                                            nCores = nCores)))
+
+    # Adding a prefix to the row names of genecount to differentiate it
+    rownames(genecount) <- paste("genecount", rownames(genecount), sep="_")
+
+    if (concatenateFeatures) {
+
+      # Rbinding the assay_data and genecount together
+      assay_data <- rbind(assay_data, genecount)
+
+    } else {
+
+      assay_data[["genecount"]] <- genecount
+
+    }
   }
 
-  # 2. Row Data
-  rowData <- data.frame(
-    FeatureCategory = gsub("_.*", "", rownames(assay_data)),
-    FeatureGene = gsub(".*_", "", rownames(assay_data))
-  )
+  if (concatenateFeatures) {
 
-  # Translate "sub" and "super" prefixes to "Subcellular" and "Supercellular" respectively
-  rowData$FeatureCategory <- gsub("^sub", "Subcellular", rowData$FeatureCategory)
-  rowData$FeatureCategory <- gsub("^super", "Supercellular", rowData$FeatureCategory)
-  rowData$FeatureCategory <- gsub("^genecount", "Genecount", rowData$FeatureCategory)
+    # 2. Row Data
+    rowData <- data.frame(
+      FeatureCategory = gsub("_.*", "", rownames(assay_data)),
+      FeatureGene = gsub(".*_", "", rownames(assay_data))
+    )
+
+    # Translate "sub" and "super" prefixes to "Subcellular" and "Supercellular" respectively
+    # rowData$FeatureCategory <- gsub("^sub", "Subcellular", rowData$FeatureCategory)
+    # rowData$FeatureCategory <- gsub("^super", "Supercellular", rowData$FeatureCategory)
+    # rowData$FeatureCategory <- gsub("^genecount", "Genecount", rowData$FeatureCategory)
+
+  } else {
+    rowData = NULL
+  }
 
   # 3. Column Data
   cell_df <- extract_boundaries_and_centroids(me)[[2]]
@@ -65,12 +81,27 @@ EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, nCor
   # Convert Cell column to a format compatible with the columns in assay_data
   colData$Cell <- paste0(colData$Sample_id, ".", colData$Cell)
 
-  # Create the SummarizedExperiment object
-  se <- SummarizedExperiment::SummarizedExperiment(
-    assays = list(spatialFeatures = as.matrix(assay_data)),
-    rowData = rowData,
-    colData = colData
-  )
+  if (concatenateFeatures) {
+    # Create the SummarizedExperiment object
+    se <- SummarizedExperiment::SummarizedExperiment(
+      assays = list(spatialFeatures = as.matrix(assay_data)),
+      rowData = rowData,
+      colData = colData
+    )
+  } else {
+
+    rnames = gsub(".*_", "", rownames(assay_data[[1]]))
+    assay_data <- lapply(assay_data, function(x){
+      rownames(x) <- rnames
+      return(x)
+    })
+
+    se <- SummarizedExperiment::SummarizedExperiment(
+      assays = assay_data,
+      rowData = rowData,
+      colData = colData
+    )
+  }
 
   return(se)
 }
diff --git a/R/EntropySummarizedExperiment_utils.R b/R/EntropySummarizedExperiment_utils.R
@@ -1,11 +1,22 @@
 #' make assay data
 #'
 #' @param df_list A list of data frames
-#' @return A single data frame
-make_assay_data = function(df_list) {
-  do.call(rbind, lapply(names(df_list), function(assayName) {
+#' @param concatenateFeatures logical whether to concatenate features (default FALSE)
+#' @return if concatenateFeatures == TRUE, A single data frame, otherwise a list
+#' containing data frames
+make_assay_data = function(df_list, concatenateFeatures = FALSE) {
+
+  assay_data_list = lapply(names(df_list), function(assayName) {
     df <- df_list[[assayName]]
     rownames(df) <- paste(assayName, rownames(df), sep="_")
     return(df)
-  }))
+  })
+  names(assay_data_list) <- names(df_list)
+
+  if (concatenateFeatures) {
+    return(do.call(rbind, assay_data_list))
+  } else {
+    return(assay_data_list)
+  }
+
 }
diff --git a/R/loadBoundaries_utils.R b/R/loadBoundaries_utils.R
@@ -21,72 +21,70 @@
 #' @param k A numeric value indicating the scaling factor for concentric polygons.
 #'        Defaults to 5.
 #' @return A list containing dataframes for each assay type:
-#' - `sub_sector`: Feature data for sub-sector polygons.
-#' - `sub_concentric`: Feature data for sub-concentric polygons.
-#' - `sub_combo`: Feature data for sub-combo polygons.
-#' - `super_concentric`: Feature data for super-concentric polygons.
-#' - `super_combo`: Feature data for super-combo polygons.
+#' - `subsector`: Feature data for sub-sector polygons.
+#' - `subconcentric`: Feature data for sub-concentric polygons.
+#' - `supersector`: Feature data for super-sector polygons.
+#' - `superconcentric`: Feature data for super-concentric polygons.
 #' @importFrom dplyr group_by mutate arrange select ungroup rowwise distinct %>% bind_rows
 #' @importFrom purrr map_dfr
 #' @importFrom parallel mclapply
-GenerateFeatureData <- function(me, featureTypes = c("sub_sector", "sub_concentric",
-                                                     "super_sector", "super_concentric"), k = 5) {
+GenerateFeatureData <- function(me, featureTypes = c("subsector", "subconcentric",
+                                                     "supersector", "superconcentric"), k = 5) {
   results <- extract_boundaries_and_centroids(me)
   df_circle = results$df_circle
 
   featureTypes <- match.arg(featureTypes, several.ok = TRUE)
 
-  if ("sub_sector" %in% featureTypes) {
-    # For sub-sector
-    sub_sector <- df_circle %>%
+  if ("subsector" %in% featureTypes) {
+    # For subsector
+    subsector <- df_circle %>%
       distinct(segment_id, x_location, y_location, .keep_all = TRUE) %>%
       mutate(angle = atan2(y_location - y_central, x_location - x_central) + pi) %>%
       split(.$segment_id) %>%
       lapply(create_sectors) %>%
       bind_rows() %>%
       dplyr::rename(x_section = x_location_sector, y_section = y_location_sector, area_id = sector_id)
   } else {
-    sub_sector <- NULL
+    subsector <- NULL
   }
 
-  if ("super_sector" %in% featureTypes) {
-    # For super-sector
-    super_sector <- create_sector_df(df_circle)
+  if ("supersector" %in% featureTypes) {
+    # For supersector
+    supersector <- create_sector_df(df_circle)
   } else {
-    super_sector <- NULL
+    supersector <- NULL
   }
 
-  if ("sub_concentric" %in% featureTypes) {
-    # Common for sub-concentric
+  if ("subconcentric" %in% featureTypes) {
+    # Common for subconcentric
     common_scale_factors <- generate_scale_factors_all(k)
     common_scaled_df <- common_scale_factors %>%
       map_dfr(~create_scaled_df_sub(.x, df_circle, k))
-    # For sub-concentric
-    sub_concentric <- common_scaled_df %>%
+    # For subconcentric
+    subconcentric <- common_scaled_df %>%
       select(x_section = x_scaled, y_section = y_scaled, segment_id, sample_id, area_id = concentric_id)
     # modify the area_id column
-    sub_concentric <- sub_concentric %>% mutate(area_id = sapply(area_id, modify_area_id))
+    subconcentric <- subconcentric %>% mutate(area_id = sapply(area_id, modify_area_id))
   } else {
-    sub_concentric <- NULL
+    subconcentric <- NULL
   }
 
-  if ("super_concentric" %in% featureTypes) {
-    # For super-concentric
+  if ("superconcentric" %in% featureTypes) {
+    # For superconcentric
     super_scale_factors <- generate_scale_factors_all_outside(k)
     super_scaled_df <- super_scale_factors %>%
       map_dfr(~create_scaled_df_super(.x, df_circle, k))
-    super_concentric <- super_scaled_df %>%
+    superconcentric <- super_scaled_df %>%
       select(x_section = x_scaled, y_section = y_scaled, segment_id, sample_id, area_id = concentric_id)
     # Modify the area_id column for each table
-    super_concentric <- super_concentric %>% mutate(area_id = sapply(area_id, modify_area_id))
+    superconcentric <- superconcentric %>% mutate(area_id = sapply(area_id, modify_area_id))
   } else {
-    super_concentric <- NULL
+    superconcentric <- NULL
   }
 
   featureData = sapply(featureTypes, get, simplify = FALSE, envir = environment())
 
   return(featureData)
-  # return(list(sub_sector = sub_sector, sub_concentric = sub_concentric, super_sector = super_sector, super_concentric = super_concentric))
 }
 
 #' Create sectors from a given data frame

diff --git a/R/spatialFeatures.R b/R/spatialFeatures.R
@@ -15,10 +15,13 @@
 #' the entropy values into a SummarizedExperiment object.
 #'
 #' @param me A MoleculeExperiment (ME) object
-#' @param featureTypes a character vector listing the types of featureTypes to include
+#' @param featureTypes A character string specifying the feature type. Supported values include
+#' "subsector", "subconcentric", "supersector", and "superconcentric".
 #' @param k A numeric value indicating the number of polygons to calculate entropy (default 5)
 #' @param nCores number of cores for parallel processing (default 1)
 #' @param includeCounts logical (default FALSE) whether to include gene counts as features
+#' @param concatenateFeatures logical whether to concatenate all the features
+#' into a single assay (default FALSE). If FALSE the output SE object has multiple assays
 #' @param ... arguments passed to loadBoundaries and EntropyMatrix
 #' @return A SummarizedExperiment object containing a spatialFeatures assay and cell-level colData
 #' @export
@@ -27,11 +30,12 @@
 #' se <- spatialFeatures(me)
 #' se
 spatialFeatures <- function(me,
-                            featureTypes = c("sub_sector", "sub_concentric",
-                                             "super_sector", "super_concentric"),
+                            featureTypes = c("subsector", "subconcentric",
+                                             "supersector", "superconcentric"),
                             k = 5,
                             nCores = 1,
                             includeCounts = FALSE,
+                            concatenateFeatures = FALSE,
                             ...) {
 
   # step 1 load new boundaries
@@ -41,7 +45,7 @@ spatialFeatures <- function(me,
   ent = EntropyMatrix(me, nCores = nCores, featureTypes = featureTypes, ...)
 
   # step 3 create SummarizedExperiment
-  se = EntropySummarizedExperiment(ent, me, includeCounts = includeCounts, nCores = nCores)
+  se = EntropySummarizedExperiment(ent, me, includeCounts = includeCounts, concatenateFeatures = concatenateFeatures, nCores = nCores)
 
   return(se)
 }
diff --git a/man/EntropyMatrix.Rd b/man/EntropyMatrix.Rd
diff --git a/man/EntropySummarizedExperiment.Rd b/man/EntropySummarizedExperiment.Rd
diff --git a/man/GenerateFeatureData.Rd b/man/GenerateFeatureData.Rd