AlexsLemonade · sjspielman · Sep 11, 2024 · Sep 3, 2024 · Sep 3, 2024 · Sep 3, 2024
@@ -22,3 +22,7 @@ Suggests:
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 RoxygenNote: 7.3.2
+Imports: 
+    bluster,
+    dplyr,
+    methods
@@ -0,0 +1,108 @@
+#' Calculate graph-based clusters from a provided matrix
+#'
+#' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data.
+#' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function.
+#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults.
+#' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard"
+#' to align with common practice in scRNA-seq analysis.
+#'
+#' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)
+#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
+#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
+#' @param nn Number of nearest neighbors. Default is 10.
+#' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
+#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
+#' @param seed Random seed to set for clustering. Default is 2024.
+#' @param cluster_args List of additional arguments to pass to the chosen clustering function.
+#'   Only single values for each argument are supported (no vectors or lists).
+#'   See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest
+#'
+#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
+#'   and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
+#'   leiden clustering will further include `objective_function`.
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # cluster using default parameters
+#' cluster_df <- calculate_clusters(pca_matrix)
+#'
+#' # cluster using the leiden algorithm with a resolution of 0.1
+#' cluster_df <- calculate_clusters(pca_matrix, algorithm = "leiden", resolution = 0.1)
+#'
+#' # cluster using the leiden algorithm with a non-default of 3 iterations
+#' cluster_df <- calculate_clusters(
+#'   pca_matrix,
+#'   algorithm = "leiden",
+#'   cluster_args = list(n_iterations = 3)
+#' )
+#' }
+calculate_clusters <- function(
+    mat,
+    algorithm = c("louvain", "walktrap", "leiden"),
-    algorithm = c("louvain", "walktrap", "leiden"),
+    algorithm = c("walktrap", "louvain", "leiden"),
-    algorithm = c("louvain", "walktrap", "leiden"),
+    algorithm = c("walktrap", "louvain", "leiden"),
+    weighting = c("jaccard", "rank", "number"),
+    nn = 10,
+    resolution = 1, # louvain or leiden
+    objective_function = c("CPM", "modularity"), # leiden only
+    cluster_args = list(),
+    seed = NULL) {
+  if (!is.null(seed)) {
+    set.seed(seed)
+  }
+
+  # Check input arguments
+  stopifnot(
+    "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")),
+    "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(mat)),
+    "`resolution` must be numeric" = is.numeric(resolution),
+    "`nn` must be numeric" = is.numeric(nn)
+  )
+
+  algorithm <- match.arg(algorithm)
+  weighting <- match.arg(weighting)
+  objective_function <- match.arg(objective_function)
+
+  if (length(cluster_args)) {
+    stopifnot(
+      "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args)),
+      "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1)
+    )
+  }
+
+  # Update cluster_args list with parameters that users can directly provide
+  # note that clusterRows throws an error if this list has a param not used by the chosen algorithm
+  if (algorithm %in% c("louvain", "leiden")) {
+    cluster_args$resolution <- resolution
+  }
+  if (algorithm == "leiden") {
+    cluster_args$objective_function <- objective_function
+  }
+
+
+  # Perform clustering
+  clusters <- bluster::clusterRows(
+    mat,
+    bluster::NNGraphParam(
+      k = nn,
+      type = weighting,
+      cluster.fun = algorithm,
+      cluster.args = cluster_args
+    )
+  )
+
+
+  # Transform results into a table and return
+  cluster_df <- data.frame(
+    cell_id = rownames(mat),
+    cluster = clusters,
+    algorithm = algorithm,
+    weighting = weighting,
+    nn = nn
+  ) |>
+    dplyr::bind_cols(
+      data.frame(cluster_args)
+    )
+
+  return(cluster_df)
+}
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(rOpenScPCA)
+
+test_check("rOpenScPCA")
@@ -0,0 +1,78 @@
+test_mat <- matrix(
+  runif(1000, -3, 3),
+  nrow = 100,
+  ncol = 10
+)
+rownames(test_mat) <- as.character(1:100)
+
+test_that("calculate_clusters runs with defaults", {
+  cluster_df <- calculate_clusters(test_mat)
+
+  expect_equal(
+    names(cluster_df),
+    c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution")
+  )
+  expect_equal(
+    cluster_df$cell_id,
+    as.character(1:100)
+  )
+
+  expect_s3_class(
+    cluster_df$cluster,
+    "factor"
+  )
+
+  expect_equal(
+    unique(cluster_df$algorithm),
+    "louvain"
+  )
+  expect_equal(
+    unique(cluster_df$weighting),
+    "jaccard"
+  )
+  expect_equal(
+    unique(cluster_df$nn),
+    10
+  )
+  expect_equal(
+    unique(cluster_df$resolution),
+    1
+  )
+})
+
+
+test_that("calculate_clusters runs with additional cluster_args", {
+  cluster_df <- calculate_clusters(
+    test_mat,
+    algorithm = "leiden",
+    cluster_args = list(n_iterations = 3)
+  )
+
+  expect_setequal(
+    names(cluster_df),
+    c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution", "objective_function", "n_iterations")
+  )
+  expect_equal(
+    unique(cluster_df$n_iterations),
+    3
+  )
+})
+
+
+
+
+test_that("calculate_clusters errors as expected", {
+  test_mat_nonames <- test_mat
+  rownames(test_mat_nonames) <- NULL
+
+  expect_error(calculate_clusters(test_mat_nonames))
+  expect_error(calculate_clusters("not a matrix"))
+  expect_error(calculate_clusters(test_mat, resolution = "string"))
+  expect_error(calculate_clusters(test_mat, nn = "string"))
+  expect_error(
+    calculate_clusters(
+      test_mat,
+      cluster_args = list(too_long = 1:10)
+    )
+  )
+})