fixes for bioc submission

SydneyBioX · Sep 19, 2024 · f126731 · f126731
1 parent e1677e3
commit f126731
Show file tree

Hide file tree

Showing 21 changed files with 298 additions and 1,366 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,27 +1,36 @@
 Package: SpatialFeatures
 Title: Entropy-based subcellular and supercellular features for molecule-resolved spatial omics datasets
-Version: 0.0.4
+Version: 0.0.5
 Authors@R: c(
-    person("Shila", "Ghazanfar", email = "[email protected]", role = c("aut", "cre", "ctb")),
+    person("Shila", "Ghazanfar", email = "[email protected]",
+    role = c("aut", "cre", "ctb"),comment = c(ORCID = "0000-0001-7861-6997")),
     person("Guan", "Gui", role = "ctb")
     )
-Description: This package uses molecule-level information to extract new cell-level features as an alternative to 
-  simply calculating gene counts. By using four categories, sub-sector, super-sector, super-sector and super-concentric
-  segmentations of cells, SpatialFeatures then uses entropy as a metric to arrive at a cell-by-gene level feature.
+Description: This package uses molecule-level information to extract new 
+  cell-level features as an alternative to simply calculating gene counts. By 
+  using four categories, sub-sector, super-sector, super-sector and 
+  super-concentric segmentations of cells, SpatialFeatures then uses entropy 
+  as a metric to arrive at a cell-by-gene level feature. Overall, this means 
+  that we can extract more nuanced information from molecule-resolved spatial
+  gene expression for further downstream analysis with SingleCellExperiment.
 License: GPL-2
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Depends: R (>= 4.4.0),
     MoleculeExperiment,
-    SummarizedExperiment,
+    SingleCellExperiment,
     dplyr,
     purrr,
     parallel
-Imports: terra, RANN, methods
+Imports:
+    terra,
+    rlang
 Suggests:
     knitr,
+    ggplot2,
     BiocStyle
 VignetteBuilder: knitr
-biocViews: Spatial
+biocViews: Spatial, GeneExpression
 URL: https://sydneybiox.github.io/SpatialFeatures/
+BugReports: https://github.com/sydneybiox/SpatialFeatures/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,15 +1,14 @@
 # Generated by roxygen2: do not edit by hand
 
 export(EntropyMatrix)
-export(EntropySummarizedExperiment)
+export(EntropySingleCellExperiment)
 export(loadBoundaries)
 export(spatialFeatures)
-import(SummarizedExperiment)
+import(SingleCellExperiment)
 importFrom(MoleculeExperiment,boundaries)
 importFrom(MoleculeExperiment,countMolecules)
 importFrom(MoleculeExperiment,dataframeToMEList)
 importFrom(dplyr,"%>%")
-importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,distinct)
 importFrom(dplyr,group_by)
@@ -19,5 +18,7 @@ importFrom(dplyr,select)
 importFrom(dplyr,ungroup)
 importFrom(parallel,mclapply)
 importFrom(purrr,map_dfr)
+importFrom(rlang,.data)
 importFrom(terra,diff)
 importFrom(terra,t)
+importFrom(utils,head)
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,2 @@
+# SpatialFeatures v0.0.5 (2024-09-19)
++ Submitted to Bioconductor.
diff --git a/R/EntropyMatrix.R b/R/EntropyMatrix.R
@@ -1,30 +1,37 @@
 #' Calculate the Entropy of a Counts Matrix
 #'
 #' @description
-#' This function computes the entropy of a counts matrix from a Molecule Experiment object based on the given assay type.
+#' This function computes the entropy of a counts matrix from a
+#' MoleculeExperiment object based on the given assay type.
 #'
 #' @param me A Molecule Experiment object.
-#' @param featureTypes A character string specifying the feature type. Supported values include
+#' @param featureTypes A character string specifying the feature type.
+#' Supported values include
 #' "subsector", "subconcentric", "supersector", and "superconcentric".
 #' @param nCores Number of cores
 #' @param ... arguments passing to CountsMatrix
 #'
-#' @return A matrix representing the entropy values corresponding to the given assay type.
+#' @return A matrix representing the entropy values corresponding to
+#' the given assay type.
 #' @export
 #' @examples
 #' data(example_me)
 #' me <- loadBoundaries(me)
-#' ent <- EntropyMatrix(me, c("subsector", "subconcentric", "supersector", "superconcentric"), nCores = 1)
+#' ent <- EntropyMatrix(me, c("subsector", "subconcentric",
+#' "supersector", "superconcentric"), nCores = 1)
 #' lapply(ent, head, n = 4)
 EntropyMatrix <- function(me, featureTypes, nCores = 1, ...) {
   # Ensure featureTypes are valid
-  # if (!all(featureTypes %in% c("subsector", "subconcentric", 'subcombo', "supersector", "superconcentric", "supercombo"))) {
+  # if (!all(featureTypes %in% c("subsector", "subconcentric", 'subcombo',
+  # "supersector", "superconcentric", "supercombo"))) {
   #   stop("Invalid assayName(s) provided!")
   # }
 
   # Generate a list of entropy matrices
   entropy_matrices <- lapply(featureTypes, function(assay) {
-    counts_matrix <- CountsMatrix(me = me, assayName = assay, nCores = nCores, ...)
+    counts_matrix <- CountsMatrix(me = me,
+                                  assayName = assay,
+                                  nCores = nCores, ...)
     matrix_entropy(counts_matrix, nCores = nCores)
   })
 

diff --git a/R/EntropyMatrix_utils.R b/R/EntropyMatrix_utils.R
@@ -5,38 +5,42 @@
 #' @importFrom terra t diff
 #' @return A matrix of annuli polygon counts.
 annuli_counts = function(mat) {
-  fac = sub("_[0-9]+$", "", colnames(mat))
-  # tmat = terra::t(mat)
-  tmat = terra::t(mat)
-  tmat_split = split.data.frame(tmat, fac)
-  # tmat_split_diffs = lapply(tmat_split, terra::diff)
-  tmat_split_diffs = lapply(tmat_split, terra::diff)
-  diffs = do.call(rbind, tmat_split_diffs)
-  cts_new = terra::t(diffs)
+  fac <- sub("_[0-9]+$", "", colnames(mat))
+  tmat <- terra::t(mat)
+  tmat_split <- split.data.frame(tmat, fac)
+  tmat_split_diffs <- lapply(tmat_split, terra::diff)
+  diffs <- do.call(rbind, tmat_split_diffs)
+  cts_new <- terra::t(diffs)
   cts_new[cts_new < 0] <- 0
   return(cts_new)
 }
 
 #' Extract Counts Matrix from Molecule Experiment based on Assay Type
 #'
 #' @description
-#' This function retrieves a counts matrix from a Molecule Experiment object based on the given assay type.
+#' This function retrieves a counts matrix from a Molecule Experiment object
+#' based on the given assay type.
 #'
 #' @param me A Molecule Experiment object.
-#' @param assayName A character string indicating the assay type. Supported values include
-#' "sub-sector", "sub-concentric", "sub-combo", "super-concentric", and "super-combo".
+#' @param assayName A character string indicating the assay type. Supported
+#' values include "sub-sector", "sub-concentric", "sub-combo",
+#' "super-concentric", and "super-combo".
 #' @param nCores Number of cores for parallel processing (default 1)
 #' @param ... arguments passing to MoleculeExperiment::countMolecules
 #'
 #' @return A counts matrix corresponding to the specified assay type.
 #' @importFrom MoleculeExperiment countMolecules
 #' @examples
-#' \dontrun{
 #' # Assuming `data_obj` is your Molecule Experiment object
-#' cm = CountsMatrix(data_obj, assayName = "sub-sector")
-#' }
+#' # cm = CountsMatrix(data_obj, assayName = "sub-sector")
 CountsMatrix <- function(me, assayName, nCores = 1, ...) {
-  counts_matrix = MoleculeExperiment::countMolecules(me, moleculesAssay = "detected", boundariesAssay = assayName, matrixOnly = TRUE, nCores = nCores, ...)
+  counts_matrix <- MoleculeExperiment::countMolecules(
+    me,
+    moleculesAssay = "detected",
+    boundariesAssay = assayName,
+    matrixOnly = TRUE,
+    nCores = nCores,
+    ...)
 
   if (assayName %in% c("subsector", "supersector")) {
     return(counts_matrix)

diff --git a/R/EntropySummarizedExperiment.R → R/EntropySingleCellExperiment.R b/R/EntropySummarizedExperiment.R → R/EntropySingleCellExperiment.R
@@ -1,45 +1,54 @@
-#' Create a SummarizedExperiment Object from a List of Data Frames
+#' Create a SingleCellExperiment Object from a List of Data Frames
 #'
-#' This function takes a list of data frames and a Molecule Experiment object and
-#' constructs a SummarizedExperiment object with the specified data structure.
+#' This function takes a list of data frames and a Molecule Experiment object
+#' and constructs a SingleCellExperiment object with the specified data
+#' structure.
 #'
 #' @param df_list A list of data frames, each containing assay data.
 #' @param me A Molecule Experiment object.
-#' @param includeCounts logical (default FALSE) whether to include gene counts as features
+#' @param includeCounts logical (default FALSE) whether to include gene
+#' counts as features
 #' @param concatenateFeatures logical whether to concatenate all the features
-#' into a single assay (default FALSE). If FALSE the output SE object has multiple assays
+#' into a single assay (default FALSE). If FALSE the output SE object has
+#' multiple assays
 #' @param nCores Number of cores (default 1)
 #'
-#' @return A SummarizedExperiment object.
+#' @return A SingleCellExperiment object.
 #'
 #' @export
-#' @import SummarizedExperiment
+#' @import SingleCellExperiment
 #' @import SpatialFeatures
 #' @importFrom MoleculeExperiment countMolecules
 #' @examples
 #' data(example_me)
 #' me <- loadBoundaries(me)
-#' ent <- EntropyMatrix(me, c("subsector", "subconcentric", "supersector", "superconcentric"), nCores = 1)
-#' se <- EntropySummarizedExperiment(ent, me)
+#' ent <- EntropyMatrix(me, c("subsector", "subconcentric",
+#' "supersector", "superconcentric"), nCores = 1)
+#' se <- EntropySingleCellExperiment(ent, me)
 #' se
-EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, concatenateFeatures = FALSE, nCores = 1) {
+EntropySingleCellExperiment <- function(df_list,
+                                        me,
+                                        includeCounts = FALSE,
+                                        concatenateFeatures = FALSE,
+                                        nCores = 1) {
 
   # 1. Assay Data: Using countMolecules function to get assay data.
   # Creating the assay_data
-  assay_data <- make_assay_data(df_list, concatenateFeatures = concatenateFeatures)
+  assay_data <- make_assay_data(df_list,
+                                concatenateFeatures = concatenateFeatures)
 
   if (includeCounts) {
     # Generating the genecount
-    genecount = MoleculeExperiment::countMolecules(me,
-                                                   moleculesAssay = "detected",
-                                                   boundariesAssay = "cell",
-                                                   matrixOnly = TRUE,
-                                                   nCores = nCores)
+    genecount <- MoleculeExperiment::countMolecules(me,
+                                                    moleculesAssay = "detected",
+                                                    boundariesAssay = "cell",
+                                                    matrixOnly = TRUE,
+                                                    nCores = nCores)
 
     if (concatenateFeatures) {
 
       # Adding a prefix to the row names of genecount to differentiate it
-      rownames(genecount) <- paste("genecount", rownames(genecount), sep="_")
+      rownames(genecount) <- paste("genecount", rownames(genecount), sep = "_")
 
       genecount <- as.matrix(genecount)
 
@@ -61,47 +70,37 @@ EntropySummarizedExperiment <- function(df_list, me, includeCounts = FALSE, conc
       FeatureGene = gsub(".*_", "", rownames(assay_data))
     )
 
-    # Translate "sub" and "super" prefixes to "Subcellular" and "Supercellular" respectively
-    # rowData$FeatureCategory <- gsub("^sub", "Subcellular", rowData$FeatureCategory)
-    # rowData$FeatureCategory <- gsub("^super", "Supercellular", rowData$FeatureCategory)
-    # rowData$FeatureCategory <- gsub("^genecount", "Genecount", rowData$FeatureCategory)
-
   } else {
-    rowData = NULL
+    rowData <- NULL
   }
 
   # 3. Column Data
   cell_df <- extract_boundaries_and_centroids(me)[[2]]
-  # Renaming the columns for clarity and ease of use
-  colnames(cell_df) <- c("x_location", "y_location", "Cell", "Sample_id", "x_central", "y_central")
-  # Now, create numericList for each cell
+  colnames(cell_df) <- c("x_location", "y_location",
+                         "Cell", "Sample_id", "x_central", "y_central")
   cell_df_list <- split(cell_df[, c('x_location', 'y_location')], cell_df$Cell)
-  # Extract unique rows
   colData <- unique(cell_df[, c("Cell", "Sample_id", "x_central", "y_central")])
-  # Combine x_central, y_central, and numericList
   colData$boundaries <- I(cell_df_list)
-  # Convert Cell column to a format compatible with the columns in assay_data
   colData$Cell <- paste0(colData$Sample_id, ".", colData$Cell)
 
   if (concatenateFeatures) {
-    # Create the SummarizedExperiment object
-    se <- SummarizedExperiment::SummarizedExperiment(
+    se <- SingleCellExperiment::SingleCellExperiment(
       assays = list(spatialFeatures = as.matrix(assay_data)),
       rowData = rowData,
       colData = colData
     )
   } else {
 
-    rnames = gsub(".*_", "", rownames(assay_data[[1]]))
-    cnames = gsub(".*_", "", colnames(assay_data[[1]]))
+    rnames <- gsub(".*_", "", rownames(assay_data[[1]]))
+    cnames <- gsub(".*_", "", colnames(assay_data[[1]]))
 
     assay_data <- lapply(assay_data, function(x){
       rownames(x) <- rnames
       colnames(x) <- cnames
       return(x)
     })
 
-    se <- SummarizedExperiment::SummarizedExperiment(
+    se <- SingleCellExperiment::SingleCellExperiment(
       assays = assay_data,
       rowData = rowData,
       colData = colData

diff --git a/R/EntropySummarizedExperiment_utils.R → R/EntropySingleCellExperiment_utils.R b/R/EntropySummarizedExperiment_utils.R → R/EntropySingleCellExperiment_utils.R
@@ -1,14 +1,15 @@
 #' make assay data
 #'
 #' @param df_list A list of data frames
-#' @param concatenateFeatures logical whether to concatenate features (default FALSE)
-#' @return if concatenateFeatures == TRUE, A single data frame, otherwise a list
-#' containing data frames
+#' @param concatenateFeatures logical whether to concatenate
+#' features (default FALSE)
+#' @return if concatenateFeatures == TRUE, A single data frame,
+#' otherwise a list containing data frames
 make_assay_data = function(df_list, concatenateFeatures = FALSE) {
 
-  assay_data_list = lapply(names(df_list), function(assayName) {
+  assay_data_list <- lapply(names(df_list), function(assayName) {
     df <- df_list[[assayName]]
-    rownames(df) <- paste(assayName, rownames(df), sep="_")
+    rownames(df) <- paste(assayName, rownames(df), sep = "_")
     return(df)
   })
   names(assay_data_list) <- names(df_list)

diff --git a/R/loadBoundaries.R b/R/loadBoundaries.R
@@ -1,21 +1,22 @@
 #' Load Boundaries to Molecule Experiment Object
 #'
 #' @description
-#' This function takes a Molecule Experiment (ME) object as input and enriches it
-#' with boundary data from a variety of assays. The currently supported assays are:
+#' This function takes a Molecule Experiment (ME) object as input and enriches
+#' it with boundary data from a variety of assays. The currently supported
+#' assays are:
 #'
 #' - Sub-sector polygons
 #' - Sub-concentric polygons
 #' - Super-sector polygons
 #' - Super-concentric polygons
 #'
-#' The function internally processes each assay, extracts respective boundary information,
-#' and updates the original ME object with this new data.
+#' The function internally processes each assay, extracts respective boundary
+#' information, and updates the original ME object with this new data.
 #'
 #' @param me A MoleculeExperiment (ME) object
 #' @param ... arguments passed to GenerateFeatureData
-#' @return An enriched Molecule Experiment (ME) object that includes boundary information
-#'         for the supported assays.
+#' @return An enriched Molecule Experiment (ME) object that includes boundary
+#' information for the supported assays.
 #' @importFrom MoleculeExperiment dataframeToMEList
 #' @importFrom MoleculeExperiment boundaries
 #' @export
@@ -27,7 +28,8 @@ loadBoundaries <- function(me,...) {
 
   featureData = GenerateFeatureData(me, ...)
 
-  featureData <- lapply(featureData, function(x) {x$segment_id <- x$area_id; return(x)})
+  featureData <- lapply(featureData,
+                        function(x) {x$segment_id <- x$area_id; return(x)})
 
   bds_list = lapply(featureData, MoleculeExperiment::dataframeToMEList,
                     dfType = "boundaries", assayName = "cell",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# SpatialFeatures v0.0.5 (2024-09-19)
		+ Submitted to Bioconductor.