From 5fba01770cc5d42653eb300bbec9f3a0a03420b6 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Tue, 3 Sep 2024 14:23:29 -0400
Subject: [PATCH 01/41] initial clustering function

---
 packages/rOpenScPCA/R/calculate-clusters.R | 68 ++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 packages/rOpenScPCA/R/calculate-clusters.R

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
new file mode 100644
index 000000000..febd489fa
--- /dev/null
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -0,0 +1,68 @@
+calculate_clusters <- function(
+    mat,
+    algorithm = "louvain",
+    weighting = "jaccard",
+    nn = 10,
+    resolution = 1, # leiden and louvain
+    objective_function = "CPM", # leiden only
+    random_seed = 2024,
+    cluster_args = list()) {
+  set.seed(random_seed)
+
+  # Check input arguments
+  stopifnot(
+    "The `mat` argument must be a matrix." = class(mat) %in% c("Matrix", "dgCMatrix"),
+    "The `mat` matrix must be numeric." = type(mat) %in% c("numeric", "double"),
+    "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat)))
+  )
+
+  algorithm <- toLower(algorithm)
+  stopifnot(
+    "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." =
+      algorithm %in% c("louvain", "walktrap", "leiden")
+  )
+
+  weighting <- toLower(weighting)
+  stopifnot(
+    "`weighting` must be one of 'jaccard' (default) or 'rank'." =
+      weighting %in% c("jaccard", "rank", "leiden")
+  )
+
+  # TODO: consider adding more specific checks here?
+  stopifnot(
+    "`cluster_args` must be a list." = class(cluster_args) == "list"
+  )
+
+  # Update cluster_args list with settings that users can directly provide
+  if (algorithm != "walktrap") {
+    cluster_args$resolution <- resolution
+  }
+  if (algorithm == "leiden") {
+    cluster_args$resolution <- objective_function
+  }
+
+
+  # Perform clustering
+  clusters <- bluster::clusterRows(
+    mat,
+    bluster::NNGraphParam(
+      k = nn,
+      type = weighting,
+      cluster.fun = algorithm,
+      cluster.args = cluster_args
+    )
+  )
+
+
+  # Transform results into a table and return
+  # TODO: Should this have _all_ parameters in cluster.args?
+  return(
+    tibble::tibble(
+      barcode = rownames(mat),
+      cluster = clusters,
+      algorithm = algorithm,
+      weighting = weighting,
+      nn = nn
+    )
+  )
+}

From 116c73d54924b1da2d0373bb0c2bbc54f2844a06 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Tue, 3 Sep 2024 14:30:58 -0400
Subject: [PATCH 02/41] add bluster and roxygen. Update column name to cell_id

---
 packages/rOpenScPCA/DESCRIPTION               |  2 +
 packages/rOpenScPCA/R/calculate-clusters.R    | 20 ++++++++-
 packages/rOpenScPCA/man/calculate_clusters.Rd | 45 +++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 packages/rOpenScPCA/man/calculate_clusters.Rd

diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION
index fa2c4f557..839d757e5 100644
--- a/packages/rOpenScPCA/DESCRIPTION
+++ b/packages/rOpenScPCA/DESCRIPTION
@@ -22,3 +22,5 @@ Suggests:
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 RoxygenNote: 7.3.2
+Imports: 
+    bluster
diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index febd489fa..3b8a9a184 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -1,3 +1,21 @@
+#' Calculate graph-based clusters from a provided matrix
+#'
+#' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)
+#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden"
+#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default) or "rank"
+#' @param nn Number of nearest neighbors. Default is 10.
+#' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
+#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
+#' @param random_seed Random seed to set for clustering. Default is 2024.
+#' @param cluster_args List of additional arguments to pass to the clustering function.
+#'
+#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters.
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED
+#' }
 calculate_clusters <- function(
     mat,
     algorithm = "louvain",
@@ -58,7 +76,7 @@ calculate_clusters <- function(
   # TODO: Should this have _all_ parameters in cluster.args?
   return(
     tibble::tibble(
-      barcode = rownames(mat),
+      cell_id = rownames(mat),
       cluster = clusters,
       algorithm = algorithm,
       weighting = weighting,
diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
new file mode 100644
index 000000000..38cb178b8
--- /dev/null
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/calculate-clusters.R
+\name{calculate_clusters}
+\alias{calculate_clusters}
+\title{Calculate graph-based clusters from a provided matrix}
+\usage{
+calculate_clusters(
+  mat,
+  algorithm = "louvain",
+  weighting = "jaccard",
+  nn = 10,
+  resolution = 1,
+  objective_function = "CPM",
+  random_seed = 2024,
+  cluster_args = list()
+)
+}
+\arguments{
+\item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)}
+
+\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden"}
+
+\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default) or "rank"}
+
+\item{nn}{Number of nearest neighbors. Default is 10.}
+
+\item{resolution}{Resolution parameter used by louvain and leiden clustering only. Default is 1.}
+
+\item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"}
+
+\item{random_seed}{Random seed to set for clustering. Default is 2024.}
+
+\item{cluster_args}{List of additional arguments to pass to the clustering function.}
+}
+\value{
+A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters.
+}
+\description{
+Calculate graph-based clusters from a provided matrix
+}
+\examples{
+\dontrun{
+  # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED
+}
+}

From 3ad4f4961a4298d71cae688673c15f3f6986bd25 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Tue, 3 Sep 2024 14:49:22 -0400
Subject: [PATCH 03/41] implement test of defaults and debug

---
 packages/rOpenScPCA/R/calculate-clusters.R    |  7 ++--
 .../tests/testthat/test-calculate-clusters.R  | 35 +++++++++++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)
 create mode 100644 packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 3b8a9a184..b4203deda 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -29,18 +29,17 @@ calculate_clusters <- function(
 
   # Check input arguments
   stopifnot(
-    "The `mat` argument must be a matrix." = class(mat) %in% c("Matrix", "dgCMatrix"),
-    "The `mat` matrix must be numeric." = type(mat) %in% c("numeric", "double"),
+    "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "dgCMatrix")),
     "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat)))
   )
 
-  algorithm <- toLower(algorithm)
+  algorithm <- tolower(algorithm)
   stopifnot(
     "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." =
       algorithm %in% c("louvain", "walktrap", "leiden")
   )
 
-  weighting <- toLower(weighting)
+  weighting <- tolower(weighting)
   stopifnot(
     "`weighting` must be one of 'jaccard' (default) or 'rank'." =
       weighting %in% c("jaccard", "rank", "leiden")
diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
new file mode 100644
index 000000000..13a587d31
--- /dev/null
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -0,0 +1,35 @@
+test_that("calculate_clusters runs with defaults", {
+  test_mat <- matrix(
+    runif(1000, 0.1, 10),
+    nrow = 100,
+    ncol = 10
+  )
+
+  rownames(test_mat) <- 1:100
+  cluster_df <- calculate_clusters(test_mat)
+
+
+  expect_equal(
+    names(cluster_df),
+    c("cell_id", "cluster", "algorithm", "weighting", "nn")
+  )
+  expect_equal(
+    cluster_df$cell_id,
+    as.character(1:100)
+  )
+  expect_true(
+    is.factor(cluster_df$cluster)
+  )
+  expect_equal(
+    unique(cluster_df$algorithm),
+    "louvain"
+  )
+  expect_equal(
+    unique(cluster_df$weighting),
+    "jaccard"
+  )
+  expect_equal(
+    unique(cluster_df$nn),
+    10
+  )
+})

From 5eff75831b07099b6027bdc84c5df51189f8d85b Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Wed, 4 Sep 2024 08:37:55 -0400
Subject: [PATCH 04/41] add testthat file

---
 packages/rOpenScPCA/tests/testthat.R | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 packages/rOpenScPCA/tests/testthat.R

diff --git a/packages/rOpenScPCA/tests/testthat.R b/packages/rOpenScPCA/tests/testthat.R
new file mode 100644
index 000000000..5dc4da8e3
--- /dev/null
+++ b/packages/rOpenScPCA/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(rOpenScPCA)
+
+test_check("rOpenScPCA")

From 381282682579a55ae64782226b44f340c06b6611 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Wed, 4 Sep 2024 08:39:00 -0400
Subject: [PATCH 05/41] add tibble

---
 packages/rOpenScPCA/DESCRIPTION               | 3 ++-
 packages/rOpenScPCA/man/calculate_clusters.Rd | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION
index 839d757e5..0893561ed 100644
--- a/packages/rOpenScPCA/DESCRIPTION
+++ b/packages/rOpenScPCA/DESCRIPTION
@@ -23,4 +23,5 @@ Suggests:
 Config/testthat/edition: 3
 RoxygenNote: 7.3.2
 Imports: 
-    bluster
+    bluster,
+    tibble
diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index 38cb178b8..d76097fff 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -40,6 +40,6 @@ Calculate graph-based clusters from a provided matrix
 }
 \examples{
 \dontrun{
-  # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED
+# TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED
 }
 }

From 0305faaae709087163e23450ee2fe17392d2be29 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Wed, 4 Sep 2024 08:47:51 -0400
Subject: [PATCH 06/41] some comments for future us

---
 packages/rOpenScPCA/R/calculate-clusters.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index b4203deda..caea7c899 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -9,7 +9,7 @@
 #' @param random_seed Random seed to set for clustering. Default is 2024.
 #' @param cluster_args List of additional arguments to pass to the clustering function.
 #'
-#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters.
+#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters.
 #' @export
 #'
 #' @examples
@@ -51,6 +51,7 @@ calculate_clusters <- function(
   )
 
   # Update cluster_args list with settings that users can directly provide
+  # clusterRows throws an error if this list has a param not used by the chosen algorithm
   if (algorithm != "walktrap") {
     cluster_args$resolution <- resolution
   }
@@ -72,7 +73,7 @@ calculate_clusters <- function(
 
 
   # Transform results into a table and return
-  # TODO: Should this have _all_ parameters in cluster.args?
+  # TODO: Should this have _all_ (non-default/user-specified) parameters in cluster.args?
   return(
     tibble::tibble(
       cell_id = rownames(mat),

From f6b736d012669327d9070bdd92d5acaa79651322 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 13:01:00 -0400
Subject: [PATCH 07/41] Apply suggestions from code review

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 packages/rOpenScPCA/R/calculate-clusters.R | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index caea7c899..f92fe6759 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -23,13 +23,16 @@ calculate_clusters <- function(
     nn = 10,
     resolution = 1, # leiden and louvain
     objective_function = "CPM", # leiden only
-    random_seed = 2024,
-    cluster_args = list()) {
-  set.seed(random_seed)
+    cluster_args = list(),
+    seed = NULL    
+) {
+  if (!is.null(seed)) {
+    set.seed(seed)
+  }
 
   # Check input arguments
   stopifnot(
-    "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "dgCMatrix")),
+    "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")),
     "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat)))
   )
 
@@ -47,16 +50,17 @@ calculate_clusters <- function(
 
   # TODO: consider adding more specific checks here?
   stopifnot(
-    "`cluster_args` must be a list." = class(cluster_args) == "list"
+    "`cluster_args` must be a list." = is.list(cluster_args)"
   )
 
   # Update cluster_args list with settings that users can directly provide
   # clusterRows throws an error if this list has a param not used by the chosen algorithm
-  if (algorithm != "walktrap") {
+  if (algorithm == "louvain") {
     cluster_args$resolution <- resolution
   }
   if (algorithm == "leiden") {
-    cluster_args$resolution <- objective_function
+    cluster_args$resolution_parameter <- resolution
+    cluster_args$objective_function <- objective_function
   }
 
 

From 5b4832189f2f0427e37e97f9b19b09ccc12f1d69 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 13:02:17 -0400
Subject: [PATCH 08/41] rm .gitkeep

---
 packages/rOpenScPCA/R/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 packages/rOpenScPCA/R/.gitkeep

diff --git a/packages/rOpenScPCA/R/.gitkeep b/packages/rOpenScPCA/R/.gitkeep
deleted file mode 100644
index e69de29bb..000000000

From 29c7868d197b2bca521f19ab0267e9a1c80c4e61 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 13:03:04 -0400
Subject: [PATCH 09/41] fix quote

---
 packages/rOpenScPCA/R/calculate-clusters.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index f92fe6759..05565abb9 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -24,8 +24,7 @@ calculate_clusters <- function(
     resolution = 1, # leiden and louvain
     objective_function = "CPM", # leiden only
     cluster_args = list(),
-    seed = NULL    
-) {
+    seed = NULL) {
   if (!is.null(seed)) {
     set.seed(seed)
   }
@@ -50,7 +49,7 @@ calculate_clusters <- function(
 
   # TODO: consider adding more specific checks here?
   stopifnot(
-    "`cluster_args` must be a list." = is.list(cluster_args)"
+    "`cluster_args` must be a list." = is.list(cluster_args)
   )
 
   # Update cluster_args list with settings that users can directly provide

From 15fea48ec7c9f28da63ed3a058cff8a8bc4ee915 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 13:05:06 -0400
Subject: [PATCH 10/41] reverting since resolution_parameter has been
 superceded

---
 packages/rOpenScPCA/R/calculate-clusters.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 05565abb9..a8f7ffc7c 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -21,7 +21,7 @@ calculate_clusters <- function(
     algorithm = "louvain",
     weighting = "jaccard",
     nn = 10,
-    resolution = 1, # leiden and louvain
+    resolution = 1, # louvain or leiden
     objective_function = "CPM", # leiden only
     cluster_args = list(),
     seed = NULL) {
@@ -54,11 +54,10 @@ calculate_clusters <- function(
 
   # Update cluster_args list with settings that users can directly provide
   # clusterRows throws an error if this list has a param not used by the chosen algorithm
-  if (algorithm == "louvain") {
+  if (algorithm != "walktrap") {
     cluster_args$resolution <- resolution
   }
   if (algorithm == "leiden") {
-    cluster_args$resolution_parameter <- resolution
     cluster_args$objective_function <- objective_function
   }
 

From b168a01fe4c0c059c501ba596cb17dfb7d7182b7 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 13:07:31 -0400
Subject: [PATCH 11/41] use match.args for algorithm and weighting

---
 packages/rOpenScPCA/R/calculate-clusters.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index a8f7ffc7c..5daece8f9 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -18,8 +18,8 @@
 #' }
 calculate_clusters <- function(
     mat,
-    algorithm = "louvain",
-    weighting = "jaccard",
+    algorithm = c("louvain", "walktrap", "leiden"),
+    weighting = c("jaccard", "rank"),
     nn = 10,
     resolution = 1, # louvain or leiden
     objective_function = "CPM", # leiden only
@@ -35,16 +35,17 @@ calculate_clusters <- function(
     "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat)))
   )
 
-  algorithm <- tolower(algorithm)
+  algorithm <- match.arg(algorithm)
+  print(algorithm)
   stopifnot(
     "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." =
       algorithm %in% c("louvain", "walktrap", "leiden")
   )
 
-  weighting <- tolower(weighting)
+  weighting <- match.arg(weighting)
   stopifnot(
     "`weighting` must be one of 'jaccard' (default) or 'rank'." =
-      weighting %in% c("jaccard", "rank", "leiden")
+      weighting %in% c("jaccard", "rank")
   )
 
   # TODO: consider adding more specific checks here?

From 0a433b8f808bc4dc1754e55dfc795cd525309570 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 13:11:21 -0400
Subject: [PATCH 12/41] use data.frame and update docs

---
 packages/rOpenScPCA/R/calculate-clusters.R    |  4 ++--
 packages/rOpenScPCA/man/calculate_clusters.Rd | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 5daece8f9..788568fc4 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -6,7 +6,7 @@
 #' @param nn Number of nearest neighbors. Default is 10.
 #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
 #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
-#' @param random_seed Random seed to set for clustering. Default is 2024.
+#' @param seed Random seed to set for clustering. Default is 2024.
 #' @param cluster_args List of additional arguments to pass to the clustering function.
 #'
 #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters.
@@ -78,7 +78,7 @@ calculate_clusters <- function(
   # Transform results into a table and return
   # TODO: Should this have _all_ (non-default/user-specified) parameters in cluster.args?
   return(
-    tibble::tibble(
+    data.frame(
       cell_id = rownames(mat),
       cluster = clusters,
       algorithm = algorithm,
diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index d76097fff..dbe53edaf 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -6,13 +6,13 @@
 \usage{
 calculate_clusters(
   mat,
-  algorithm = "louvain",
-  weighting = "jaccard",
+  algorithm = c("louvain", "walktrap", "leiden"),
+  weighting = c("jaccard", "rank"),
   nn = 10,
   resolution = 1,
   objective_function = "CPM",
-  random_seed = 2024,
-  cluster_args = list()
+  cluster_args = list(),
+  seed = NULL
 )
 }
 \arguments{
@@ -28,12 +28,12 @@ calculate_clusters(
 
 \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"}
 
-\item{random_seed}{Random seed to set for clustering. Default is 2024.}
-
 \item{cluster_args}{List of additional arguments to pass to the clustering function.}
+
+\item{seed}{Random seed to set for clustering. Default is 2024.}
 }
 \value{
-A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters.
+A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters.
 }
 \description{
 Calculate graph-based clusters from a provided matrix

From 288842ace2982a1d96bf68a98f808ce50420dc99 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 13:12:12 -0400
Subject: [PATCH 13/41] rm tibble dependency

---
 packages/rOpenScPCA/DESCRIPTION | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION
index 0893561ed..839d757e5 100644
--- a/packages/rOpenScPCA/DESCRIPTION
+++ b/packages/rOpenScPCA/DESCRIPTION
@@ -23,5 +23,4 @@ Suggests:
 Config/testthat/edition: 3
 RoxygenNote: 7.3.2
 Imports: 
-    bluster,
-    tibble
+    bluster

From ed93f8593980dde20646d86f4219cdb284e3edae Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:07:40 -0400
Subject: [PATCH 14/41] return data frame with all parameters and update docs.
 update cluster_args check

---
 packages/rOpenScPCA/R/calculate-clusters.R    | 38 ++++++++++---------
 packages/rOpenScPCA/man/calculate_clusters.Rd |  4 +-
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 788568fc4..42ddfd4ea 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -9,7 +9,9 @@
 #' @param seed Random seed to set for clustering. Default is 2024.
 #' @param cluster_args List of additional arguments to pass to the clustering function.
 #'
-#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters.
+#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
+#'   and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
+#'   leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included.
 #' @export
 #'
 #' @examples
@@ -36,7 +38,6 @@ calculate_clusters <- function(
   )
 
   algorithm <- match.arg(algorithm)
-  print(algorithm)
   stopifnot(
     "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." =
       algorithm %in% c("louvain", "walktrap", "leiden")
@@ -48,13 +49,14 @@ calculate_clusters <- function(
       weighting %in% c("jaccard", "rank")
   )
 
-  # TODO: consider adding more specific checks here?
-  stopifnot(
-    "`cluster_args` must be a list." = is.list(cluster_args)
-  )
+  if (length(cluster_args) != 0) {
+    stopifnot(
+      "`cluster_args` must be a named list." = is.list(cluster_args) && !(is.null(names(cluster_args)))
+    )
+  }
 
-  # Update cluster_args list with settings that users can directly provide
-  # clusterRows throws an error if this list has a param not used by the chosen algorithm
+  # Update cluster_args list with parameters that users can directly provide
+  # note that clusterRows throws an error if this list has a param not used by the chosen algorithm
   if (algorithm != "walktrap") {
     cluster_args$resolution <- resolution
   }
@@ -76,14 +78,16 @@ calculate_clusters <- function(
 
 
   # Transform results into a table and return
-  # TODO: Should this have _all_ (non-default/user-specified) parameters in cluster.args?
-  return(
-    data.frame(
-      cell_id = rownames(mat),
-      cluster = clusters,
-      algorithm = algorithm,
-      weighting = weighting,
-      nn = nn
+  cluster_df <- data.frame(
+    cell_id = rownames(mat),
+    cluster = clusters,
+    algorithm = algorithm,
+    weighting = weighting,
+    nn = nn
+  ) |>
+    dplyr::bind_cols(
+      dplyr::bind_rows(cluster_args)
     )
-  )
+
+  return(cluster_df)
 }
diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index dbe53edaf..4f6de42ba 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -33,7 +33,9 @@ calculate_clusters(
 \item{seed}{Random seed to set for clustering. Default is 2024.}
 }
 \value{
-A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters.
+A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
+  and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
+  leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included.
 }
 \description{
 Calculate graph-based clusters from a provided matrix

From d7abb398331afbe498e54ed85d2982220a1b4198 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:08:04 -0400
Subject: [PATCH 15/41] update test for all df columns

---
 .../tests/testthat/test-calculate-clusters.R  | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index 13a587d31..8f56857bf 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -11,7 +11,7 @@ test_that("calculate_clusters runs with defaults", {
 
   expect_equal(
     names(cluster_df),
-    c("cell_id", "cluster", "algorithm", "weighting", "nn")
+    c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution")
   )
   expect_equal(
     cluster_df$cell_id,
@@ -20,16 +20,16 @@ test_that("calculate_clusters runs with defaults", {
   expect_true(
     is.factor(cluster_df$cluster)
   )
-  expect_equal(
-    unique(cluster_df$algorithm),
-    "louvain"
+  expect_true(
+    all(cluster_df$algorithm == "louvain"),
   )
-  expect_equal(
-    unique(cluster_df$weighting),
-    "jaccard"
+  expect_true(
+    all(cluster_df$weighting == "jaccard")
   )
-  expect_equal(
-    unique(cluster_df$nn),
-    10
+  expect_true(
+    all(cluster_df$nn == 10)
+  )
+  expect_true(
+    all(cluster_df$resolution == 1)
   )
 })

From e4bf3de1590cdef89d9615178306b02873c77177 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:33:18 -0400
Subject: [PATCH 16/41] dont need to handle errors with match.args

---
 packages/rOpenScPCA/R/calculate-clusters.R | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 42ddfd4ea..8137365ec 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -24,7 +24,7 @@ calculate_clusters <- function(
     weighting = c("jaccard", "rank"),
     nn = 10,
     resolution = 1, # louvain or leiden
-    objective_function = "CPM", # leiden only
+    objective_function = c("CPM", "modularity"), # leiden only
     cluster_args = list(),
     seed = NULL) {
   if (!is.null(seed)) {
@@ -38,15 +38,12 @@ calculate_clusters <- function(
   )
 
   algorithm <- match.arg(algorithm)
-  stopifnot(
-    "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." =
-      algorithm %in% c("louvain", "walktrap", "leiden")
-  )
-
   weighting <- match.arg(weighting)
+  objective_function <- match.arg(objective_function)
+
   stopifnot(
-    "`weighting` must be one of 'jaccard' (default) or 'rank'." =
-      weighting %in% c("jaccard", "rank")
+    "`resolution` must be numeric" = is.numeric(resolution),
+    "`nn` must be numeric" = is.numeric(nn)
   )
 
   if (length(cluster_args) != 0) {

From 3498350678c4ea1fe3b5b43ae726d246e97b1ee9 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:33:52 -0400
Subject: [PATCH 17/41] add tests to handle bad args, but not match.args ones

---
 .../tests/testthat/test-calculate-clusters.R  | 39 +++++++++++++++----
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index 8f56857bf..920618786 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -1,14 +1,13 @@
-test_that("calculate_clusters runs with defaults", {
-  test_mat <- matrix(
-    runif(1000, 0.1, 10),
-    nrow = 100,
-    ncol = 10
-  )
+test_mat <- matrix(
+  runif(1000, 0.1, 10),
+  nrow = 100,
+  ncol = 10
+)
+rownames(test_mat) <- as.character(1:100)
 
-  rownames(test_mat) <- 1:100
+test_that("calculate_clusters runs with defaults", {
   cluster_df <- calculate_clusters(test_mat)
 
-
   expect_equal(
     names(cluster_df),
     c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution")
@@ -33,3 +32,27 @@ test_that("calculate_clusters runs with defaults", {
     all(cluster_df$resolution == 1)
   )
 })
+
+
+test_that("calculate_clusters errors as expected", {
+  test_mat_nonames <- test_mat
+  rownames(test_mat_nonames) <- NULL
+
+  expect_error(
+    calculate_clusters(test_mat_nonames),
+    "The `mat` matrix must have row names representing cell ids e.g. barcodes."
+  )
+  expect_error(
+    calculate_clusters("not a matrix"),
+    "The `mat` argument must be a matrix."
+  )
+
+  expect_error(
+    calculate_clusters(test_mat, resolution = "string"),
+    "`resolution` must be numeric"
+  )
+  expect_error(
+    calculate_clusters(test_mat, nn = "string"),
+    "`nn` must be numeric"
+  )
+})

From 9db71daa550a4e165dd892791bc8e755ed125354 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:42:57 -0400
Subject: [PATCH 18/41] add examples

---
 packages/rOpenScPCA/R/calculate-clusters.R | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 8137365ec..3e44e6e07 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -7,7 +7,7 @@
 #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
 #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
 #' @param seed Random seed to set for clustering. Default is 2024.
-#' @param cluster_args List of additional arguments to pass to the clustering function.
+#' @param cluster_args List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest
 #'
 #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
 #'   and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
@@ -16,7 +16,18 @@
 #'
 #' @examples
 #' \dontrun{
-#' # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED
+#' # cluster using default parameters
+#' cluster_df <- calculate_clusters(pca_matrix)
+#'
+#' # cluster using the leiden algorithm with a resolution of 0.1
+#' cluster_df <- calculate_clusters(pca_matrix, algorithm = "leiden", resolution = 0.1)
+#'
+#' # cluster using the leiden algorithm with a non-default of 3 iterations
+#' cluster_df <- calculate_clusters(
+#'   pca_matrix,
+#'   algorithm = "leiden",
+#'   cluster_args = list(n_iterations = 3)
+#' )
 #' }
 calculate_clusters <- function(
     mat,

From d576509791a27faa8a78f5e12c2af1fd04c114a1 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:43:06 -0400
Subject: [PATCH 19/41] rerun document

---
 packages/rOpenScPCA/man/calculate_clusters.Rd | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index 4f6de42ba..c313d514e 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -10,7 +10,7 @@ calculate_clusters(
   weighting = c("jaccard", "rank"),
   nn = 10,
   resolution = 1,
-  objective_function = "CPM",
+  objective_function = c("CPM", "modularity"),
   cluster_args = list(),
   seed = NULL
 )
@@ -28,7 +28,7 @@ calculate_clusters(
 
 \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"}
 
-\item{cluster_args}{List of additional arguments to pass to the clustering function.}
+\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest}
 
 \item{seed}{Random seed to set for clustering. Default is 2024.}
 }
@@ -42,6 +42,18 @@ Calculate graph-based clusters from a provided matrix
 }
 \examples{
 \dontrun{
-# TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED
+# cluster using default parameters
+cluster_df <- calculate_clusters(pca_matrix)
+
+# cluster using the leiden algorithm with a resolution of 0.1
+cluster_df <- calculate_clusters(pca_matrix, algorithm = "leiden", resolution = 0.1)
+
+# cluster using the leiden algorithm with a non-default of 3 iterations
+cluster_df <- calculate_clusters(
+ pca_matrix,
+ algorithm = "leiden",
+ cluster_args = list(n_iterations = 3)
+)
+
 }
 }

From 52fd2d107c11026d00f4a9f1061c57fb5a711b77 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:43:36 -0400
Subject: [PATCH 20/41] add tests for additional args

---
 .../tests/testthat/test-calculate-clusters.R  | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index 920618786..9a9c3082b 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -20,7 +20,7 @@ test_that("calculate_clusters runs with defaults", {
     is.factor(cluster_df$cluster)
   )
   expect_true(
-    all(cluster_df$algorithm == "louvain"),
+    all(cluster_df$algorithm == "louvain")
   )
   expect_true(
     all(cluster_df$weighting == "jaccard")
@@ -34,6 +34,26 @@ test_that("calculate_clusters runs with defaults", {
 })
 
 
+test_that("calculate_clusters runs with additional cluster_args", {
+  cluster_df <- calculate_clusters(
+    test_mat,
+    algorithm = "leiden",
+    # the default is 2
+    cluster_args = list(n_iterations = 3)
+  )
+
+  expect_setequal(
+    names(cluster_df),
+    c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution", "objective_function", "n_iterations")
+  )
+  expect_true(
+    all(cluster_df$n_iterations == 3)
+  )
+})
+
+
+
+
 test_that("calculate_clusters errors as expected", {
   test_mat_nonames <- test_mat
   rownames(test_mat_nonames) <- NULL

From c11cedc445e368a3400bd060d49f604afabebb47 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:46:14 -0400
Subject: [PATCH 21/41] add dplyr dep

---
 packages/rOpenScPCA/DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION
index 839d757e5..9da548377 100644
--- a/packages/rOpenScPCA/DESCRIPTION
+++ b/packages/rOpenScPCA/DESCRIPTION
@@ -23,4 +23,5 @@ Suggests:
 Config/testthat/edition: 3
 RoxygenNote: 7.3.2
 Imports: 
-    bluster
+    bluster,
+    dplyr

From e35bdf6f316916241a2f37afc9a7d13bb291aa92 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:46:38 -0400
Subject: [PATCH 22/41] update error

---
 packages/rOpenScPCA/R/calculate-clusters.R                   | 2 +-
 packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 3e44e6e07..9e16ec356 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -45,7 +45,7 @@ calculate_clusters <- function(
   # Check input arguments
   stopifnot(
     "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")),
-    "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat)))
+    "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = !(is.null(rownames(mat)))
   )
 
   algorithm <- match.arg(algorithm)
diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index 9a9c3082b..e6bc4b87b 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -60,7 +60,7 @@ test_that("calculate_clusters errors as expected", {
 
   expect_error(
     calculate_clusters(test_mat_nonames),
-    "The `mat` matrix must have row names representing cell ids e.g. barcodes."
+    "The `mat` matrix must have row names representing cell ids, e.g. barcodes."
   )
   expect_error(
     calculate_clusters("not a matrix"),

From b724ebb3725b740b16d346ab26b1f281244135af Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Mon, 9 Sep 2024 16:46:51 -0400
Subject: [PATCH 23/41] document

---
 packages/rOpenScPCA/man/calculate_clusters.Rd | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index c313d514e..f86960b10 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -50,10 +50,9 @@ cluster_df <- calculate_clusters(pca_matrix, algorithm = "leiden", resolution =
 
 # cluster using the leiden algorithm with a non-default of 3 iterations
 cluster_df <- calculate_clusters(
- pca_matrix,
- algorithm = "leiden",
- cluster_args = list(n_iterations = 3)
+  pca_matrix,
+  algorithm = "leiden",
+  cluster_args = list(n_iterations = 3)
 )
-
 }
 }

From 4d1a91f34e6ecddda533b7c580c17144a274a829 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 11:21:00 -0400
Subject: [PATCH 24/41] Apply suggestions from code review

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 packages/rOpenScPCA/R/calculate-clusters.R                  | 6 +++---
 .../rOpenScPCA/tests/testthat/test-calculate-clusters.R     | 6 ++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 9e16ec356..bf72bce15 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -45,7 +45,7 @@ calculate_clusters <- function(
   # Check input arguments
   stopifnot(
     "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")),
-    "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = !(is.null(rownames(mat)))
+    "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(mat))
   )
 
   algorithm <- match.arg(algorithm)
@@ -57,7 +57,7 @@ calculate_clusters <- function(
     "`nn` must be numeric" = is.numeric(nn)
   )
 
-  if (length(cluster_args) != 0) {
+  if (length(cluster_args)) {
     stopifnot(
       "`cluster_args` must be a named list." = is.list(cluster_args) && !(is.null(names(cluster_args)))
     )
@@ -65,7 +65,7 @@ calculate_clusters <- function(
 
   # Update cluster_args list with parameters that users can directly provide
   # note that clusterRows throws an error if this list has a param not used by the chosen algorithm
-  if (algorithm != "walktrap") {
+  if (algorithm %in% c("louvain", "leiden")) {
     cluster_args$resolution <- resolution
   }
   if (algorithm == "leiden") {
diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index e6bc4b87b..dc0df73a7 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -1,5 +1,5 @@
 test_mat <- matrix(
-  runif(1000, 0.1, 10),
+  runif(1000, -3, 3),
   nrow = 100,
   ncol = 10
 )
@@ -16,9 +16,7 @@ test_that("calculate_clusters runs with defaults", {
     cluster_df$cell_id,
     as.character(1:100)
   )
-  expect_true(
-    is.factor(cluster_df$cluster)
-  )
+  expect_type(cluster_df$cluster, "factor")
   expect_true(
     all(cluster_df$algorithm == "louvain")
   )

From 4daa7e51b453b852adfd09a99c5260c18d94abce Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 11:30:50 -0400
Subject: [PATCH 25/41] remove .gitkeep again, got restored with the main merge

---
 packages/rOpenScPCA/R/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 packages/rOpenScPCA/R/.gitkeep

diff --git a/packages/rOpenScPCA/R/.gitkeep b/packages/rOpenScPCA/R/.gitkeep
deleted file mode 100644
index e69de29bb..000000000

From 835b7eecff54c2fb273088c451f7aa451090f610 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 11:35:59 -0400
Subject: [PATCH 26/41] expect_s3_class

---
 packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index dc0df73a7..6a6796cd8 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -16,7 +16,7 @@ test_that("calculate_clusters runs with defaults", {
     cluster_df$cell_id,
     as.character(1:100)
   )
-  expect_type(cluster_df$cluster, "factor")
+  expect_s3_class(cluster_df$cluster, "factor")
   expect_true(
     all(cluster_df$algorithm == "louvain")
   )

From d8f5b311676d5ded0ed94f2c41cacce29ba45f72 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 11:36:38 -0400
Subject: [PATCH 27/41] combine stopifnots and update to allNames

---
 packages/rOpenScPCA/R/calculate-clusters.R | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index bf72bce15..89bcb5637 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -45,21 +45,18 @@ calculate_clusters <- function(
   # Check input arguments
   stopifnot(
     "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")),
-    "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(mat))
+    "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(mat)),
+    "`resolution` must be numeric" = is.numeric(resolution),
+    "`nn` must be numeric" = is.numeric(nn)
   )
 
   algorithm <- match.arg(algorithm)
   weighting <- match.arg(weighting)
   objective_function <- match.arg(objective_function)
 
-  stopifnot(
-    "`resolution` must be numeric" = is.numeric(resolution),
-    "`nn` must be numeric" = is.numeric(nn)
-  )
-
   if (length(cluster_args)) {
     stopifnot(
-      "`cluster_args` must be a named list." = is.list(cluster_args) && !(is.null(names(cluster_args)))
+      "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% allNames(cluster_args))
     )
   }
 

From 329180d6c634bbdff7f54781bc911673f6ab48e2 Mon Sep 17 00:00:00 2001
From: "Stephanie J. Spielman" <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 11:37:25 -0400
Subject: [PATCH 28/41] dont check the specific error

---
 .../tests/testthat/test-calculate-clusters.R  | 21 ++++---------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index 6a6796cd8..a0f3d6bb1 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -56,21 +56,8 @@ test_that("calculate_clusters errors as expected", {
   test_mat_nonames <- test_mat
   rownames(test_mat_nonames) <- NULL
 
-  expect_error(
-    calculate_clusters(test_mat_nonames),
-    "The `mat` matrix must have row names representing cell ids, e.g. barcodes."
-  )
-  expect_error(
-    calculate_clusters("not a matrix"),
-    "The `mat` argument must be a matrix."
-  )
-
-  expect_error(
-    calculate_clusters(test_mat, resolution = "string"),
-    "`resolution` must be numeric"
-  )
-  expect_error(
-    calculate_clusters(test_mat, nn = "string"),
-    "`nn` must be numeric"
-  )
+  expect_error(calculate_clusters(test_mat_nonames))
+  expect_error(calculate_clusters("not a matrix"))
+  expect_error(calculate_clusters(test_mat, resolution = "string"))
+  expect_error(calculate_clusters(test_mat, nn = "string"))
 })

From 37d4973faa30aec4b55e4bcdef1def21419f181b Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 16:04:17 -0400
Subject: [PATCH 29/41] use expect_equal instead

---
 .../tests/testthat/test-calculate-clusters.R  | 32 ++++++++++++-------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index a0f3d6bb1..d502399aa 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -16,18 +16,27 @@ test_that("calculate_clusters runs with defaults", {
     cluster_df$cell_id,
     as.character(1:100)
   )
-  expect_s3_class(cluster_df$cluster, "factor")
-  expect_true(
-    all(cluster_df$algorithm == "louvain")
+
+  expect_s3_class(
+    cluster_df$cluster,
+    "factor"
+  )
+
+  expect_equal(
+    unique(cluster_df$algorithm),
+    "louvain"
   )
-  expect_true(
-    all(cluster_df$weighting == "jaccard")
+  expect_equal(
+    unique(cluster_df$weighting),
+    "jaccard"
   )
-  expect_true(
-    all(cluster_df$nn == 10)
+  expect_equal(
+    unique(cluster_df$nn),
+    10
   )
-  expect_true(
-    all(cluster_df$resolution == 1)
+  expect_equal(
+    unique(cluster_df$resolution),
+    1
   )
 })
 
@@ -44,8 +53,9 @@ test_that("calculate_clusters runs with additional cluster_args", {
     names(cluster_df),
     c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution", "objective_function", "n_iterations")
   )
-  expect_true(
-    all(cluster_df$n_iterations == 3)
+  expect_equal(
+    unique(cluster_df$n_iterations),
+    3
   )
 })
 

From 662c33f3b8289e7210b058a420a7aa1df95f3730 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 16:11:24 -0400
Subject: [PATCH 30/41] rm comment

---
 packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index d502399aa..251612d0d 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -45,7 +45,6 @@ test_that("calculate_clusters runs with additional cluster_args", {
   cluster_df <- calculate_clusters(
     test_mat,
     algorithm = "leiden",
-    # the default is 2
     cluster_args = list(n_iterations = 3)
   )
 

From fe022d56bebf5d3a26661429bf93fe8f6d6dec46 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 16:24:47 -0400
Subject: [PATCH 31/41] expand docs for non-default args, and add number
 weighting

---
 packages/rOpenScPCA/R/calculate-clusters.R    | 18 ++++++++++++------
 packages/rOpenScPCA/man/calculate_clusters.Rd | 10 +++++++---
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 89bcb5637..ceb47e198 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -1,17 +1,22 @@
 #' Calculate graph-based clusters from a provided matrix
 #'
 #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)
-#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden"
-#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default) or "rank"
+#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
+#'  Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is
+#'  because louvain clustering is more commonly-used in scRNA-seq analysis.
+#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
+#'  Be aware that the default of "jaccard" is different from the {bluster} package default of "rank".
+#'  This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.
 #' @param nn Number of nearest neighbors. Default is 10.
 #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
 #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
 #' @param seed Random seed to set for clustering. Default is 2024.
-#' @param cluster_args List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest
-#'
+#' @param cluster_args List of additional arguments to pass to the chosen clustering function. Only single-length values will be used.
+#'   See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest
 #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
 #'   and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
-#'   leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included.
+#'   leiden clustering will further include `objective_function`.
+#'
 #' @export
 #'
 #' @examples
@@ -32,7 +37,7 @@
 calculate_clusters <- function(
     mat,
     algorithm = c("louvain", "walktrap", "leiden"),
-    weighting = c("jaccard", "rank"),
+    weighting = c("jaccard", "rank", "number"),
     nn = 10,
     resolution = 1, # louvain or leiden
     objective_function = c("CPM", "modularity"), # leiden only
@@ -60,6 +65,7 @@ calculate_clusters <- function(
     )
   }
 
+
   # Update cluster_args list with parameters that users can directly provide
   # note that clusterRows throws an error if this list has a param not used by the chosen algorithm
   if (algorithm %in% c("louvain", "leiden")) {
diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index f86960b10..b94cb48a3 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -7,7 +7,7 @@
 calculate_clusters(
   mat,
   algorithm = c("louvain", "walktrap", "leiden"),
-  weighting = c("jaccard", "rank"),
+  weighting = c("jaccard", "rank", "number"),
   nn = 10,
   resolution = 1,
   objective_function = c("CPM", "modularity"),
@@ -18,9 +18,13 @@ calculate_clusters(
 \arguments{
 \item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)}
 
-\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden"}
+\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
+Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is
+because louvain clustering is more commonly-used in scRNA-seq analysis.}
 
-\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default) or "rank"}
+\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
+Be aware that the default of "jaccard" is different from the {bluster} package default of "rank".
+This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.}
 
 \item{nn}{Number of nearest neighbors. Default is 10.}
 

From 7075b172c4c278db2ce60671dab1217595159759 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 16:42:08 -0400
Subject: [PATCH 32/41] add test for single length args

---
 .../tests/testthat/test-calculate-clusters.R     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index 251612d0d..ee7f9c27c 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -70,3 +70,19 @@ test_that("calculate_clusters errors as expected", {
   expect_error(calculate_clusters(test_mat, resolution = "string"))
   expect_error(calculate_clusters(test_mat, nn = "string"))
 })
+
+
+
+test_that("calculate_clusters handles cluster_args with length > 1", {
+  expect_warning(
+    cluster_df <- calculate_clusters(
+      test_mat,
+      cluster_args = list(too_long = 1:10)
+    )
+  )
+
+  expect_equal(
+    names(cluster_df),
+    c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution")
+  )
+})

From c89e3b9702a5b02caf7679641772512f0428ccb0 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 16:42:36 -0400
Subject: [PATCH 33/41] update DESCRIPTION and check appeasement

---
 packages/rOpenScPCA/DESCRIPTION               |  4 +++-
 packages/rOpenScPCA/R/calculate-clusters.R    | 17 ++++++++++++++---
 packages/rOpenScPCA/man/calculate_clusters.Rd |  9 +++++----
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION
index 9da548377..2574da715 100644
--- a/packages/rOpenScPCA/DESCRIPTION
+++ b/packages/rOpenScPCA/DESCRIPTION
@@ -24,4 +24,6 @@ Config/testthat/edition: 3
 RoxygenNote: 7.3.2
 Imports: 
     bluster,
-    dplyr
+    dplyr,
+    methods,
+    purrr
diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index ceb47e198..c5c8d5a31 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -2,10 +2,10 @@
 #'
 #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)
 #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
-#'  Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is
+#'  Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is
 #'  because louvain clustering is more commonly-used in scRNA-seq analysis.
 #' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
-#'  Be aware that the default of "jaccard" is different from the {bluster} package default of "rank".
+#'  Be aware that the default of "jaccard" is different from the bluster package default of "rank".
 #'  This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.
 #' @param nn Number of nearest neighbors. Default is 10.
 #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
@@ -61,10 +61,21 @@ calculate_clusters <- function(
 
   if (length(cluster_args)) {
     stopifnot(
-      "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% allNames(cluster_args))
+      "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args))
     )
   }
 
+  # Do not use any arguments whose length is >1
+  if (any(sapply(cluster_args, length) > 1)) {
+    warning("Only single-length values in 'cluster_args' will be used.")
+
+    cluster_args <- cluster_args |>
+      purrr::keep(
+        \(arg) {
+          length(arg) == 1
+        }
+      )
+  }
 
   # Update cluster_args list with parameters that users can directly provide
   # note that clusterRows throws an error if this list has a param not used by the chosen algorithm
diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index b94cb48a3..669b67725 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -19,11 +19,11 @@ calculate_clusters(
 \item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)}
 
 \item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
-Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is
+Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is
 because louvain clustering is more commonly-used in scRNA-seq analysis.}
 
 \item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
-Be aware that the default of "jaccard" is different from the {bluster} package default of "rank".
+Be aware that the default of "jaccard" is different from the bluster package default of "rank".
 This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.}
 
 \item{nn}{Number of nearest neighbors. Default is 10.}
@@ -32,14 +32,15 @@ This difference is because jaccard weighting is more commonly-used in scRNA-seq
 
 \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"}
 
-\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest}
+\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. Only single-length values will be used.
+See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest}
 
 \item{seed}{Random seed to set for clustering. Default is 2024.}
 }
 \value{
 A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
   and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
-  leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included.
+  leiden clustering will further include `objective_function`.
 }
 \description{
 Calculate graph-based clusters from a provided matrix

From e536f021ce4b045a2e78a8e130c6b44d2092d321 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Tue, 10 Sep 2024 16:44:39 -0400
Subject: [PATCH 34/41] Update packages/rOpenScPCA/R/calculate-clusters.R

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 packages/rOpenScPCA/R/calculate-clusters.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index c5c8d5a31..ee60ac20b 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -108,7 +108,7 @@ calculate_clusters <- function(
     nn = nn
   ) |>
     dplyr::bind_cols(
-      dplyr::bind_rows(cluster_args)
+      data.frame(cluster_args)
     )
 
   return(cluster_df)

From bcc92547192b5f174fdc7c1562f477f64d5c2464 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Wed, 11 Sep 2024 10:44:40 -0400
Subject: [PATCH 35/41] Apply suggestions from code review

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 packages/rOpenScPCA/R/calculate-clusters.R | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index ee60ac20b..32f63edc9 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -1,5 +1,10 @@
 #' Calculate graph-based clusters from a provided matrix
 #'
+#' This function is provided to simplify application of some bluster functions for OpenScPCA data.
+#' Note that defaults for some arguments may differ from the bluster defaults.
+#' In particular, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" 
+#' to align with common practice. 
+#'
 #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)
 #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
 #'  Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is
@@ -61,22 +66,11 @@ calculate_clusters <- function(
 
   if (length(cluster_args)) {
     stopifnot(
-      "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args))
+      "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args)),
+      "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1))
     )
   }
 
-  # Do not use any arguments whose length is >1
-  if (any(sapply(cluster_args, length) > 1)) {
-    warning("Only single-length values in 'cluster_args' will be used.")
-
-    cluster_args <- cluster_args |>
-      purrr::keep(
-        \(arg) {
-          length(arg) == 1
-        }
-      )
-  }
-
   # Update cluster_args list with parameters that users can directly provide
   # note that clusterRows throws an error if this list has a param not used by the chosen algorithm
   if (algorithm %in% c("louvain", "leiden")) {

From d904171b3abebab0bbe88ccac1c2d6172322c516 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Wed, 11 Sep 2024 10:48:46 -0400
Subject: [PATCH 36/41] docs and syntax updates

---
 packages/rOpenScPCA/R/calculate-clusters.R    | 15 ++++++---------
 packages/rOpenScPCA/man/calculate_clusters.Rd | 14 +++++++-------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 32f63edc9..8e06cab61 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -1,17 +1,14 @@
 #' Calculate graph-based clusters from a provided matrix
 #'
-#' This function is provided to simplify application of some bluster functions for OpenScPCA data.
-#' Note that defaults for some arguments may differ from the bluster defaults.
-#' In particular, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" 
-#' to align with common practice. 
+#' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data.
+#' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function.
+#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults.
+#' Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard"
+#' to align with common practice in scRNA-seq anaylsis.
 #'
 #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)
 #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
-#'  Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is
-#'  because louvain clustering is more commonly-used in scRNA-seq analysis.
 #' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
-#'  Be aware that the default of "jaccard" is different from the bluster package default of "rank".
-#'  This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.
 #' @param nn Number of nearest neighbors. Default is 10.
 #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
 #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
@@ -67,7 +64,7 @@ calculate_clusters <- function(
   if (length(cluster_args)) {
     stopifnot(
       "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args)),
-      "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1))
+      "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1)
     )
   }
 
diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index 669b67725..931d5950e 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -18,13 +18,9 @@ calculate_clusters(
 \arguments{
 \item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)}
 
-\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
-Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is
-because louvain clustering is more commonly-used in scRNA-seq analysis.}
+\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".}
 
-\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
-Be aware that the default of "jaccard" is different from the bluster package default of "rank".
-This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.}
+\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"}
 
 \item{nn}{Number of nearest neighbors. Default is 10.}
 
@@ -43,7 +39,11 @@ A data frame of cluster results with columns `cell_id` and `cluster`. Additional
   leiden clustering will further include `objective_function`.
 }
 \description{
-Calculate graph-based clusters from a provided matrix
+This function is provided to simplify application of bluster package clustering functions on OpenScPCA data.
+In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function.
+Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults.
+Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard"
+to align with common practice in scRNA-seq anaylsis.
 }
 \examples{
 \dontrun{

From 0cdfe03269c5ad82ada0f67a17dacd1a2383ed19 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Wed, 11 Sep 2024 10:49:41 -0400
Subject: [PATCH 37/41] remove purrr dependency

---
 packages/rOpenScPCA/DESCRIPTION | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION
index 2574da715..ff59937d6 100644
--- a/packages/rOpenScPCA/DESCRIPTION
+++ b/packages/rOpenScPCA/DESCRIPTION
@@ -25,5 +25,4 @@ RoxygenNote: 7.3.2
 Imports: 
     bluster,
     dplyr,
-    methods,
-    purrr
+    methods

From e7b22cc6f1bc488fceab90571a4c2ffcf75ba413 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Wed, 11 Sep 2024 10:49:59 -0400
Subject: [PATCH 38/41] update test to expect_error

---
 .../tests/testthat/test-calculate-clusters.R       | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
index ee7f9c27c..b150c4e8d 100644
--- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
+++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R
@@ -69,20 +69,10 @@ test_that("calculate_clusters errors as expected", {
   expect_error(calculate_clusters("not a matrix"))
   expect_error(calculate_clusters(test_mat, resolution = "string"))
   expect_error(calculate_clusters(test_mat, nn = "string"))
-})
-
-
-
-test_that("calculate_clusters handles cluster_args with length > 1", {
-  expect_warning(
-    cluster_df <- calculate_clusters(
+  expect_error(
+    calculate_clusters(
       test_mat,
       cluster_args = list(too_long = 1:10)
     )
   )
-
-  expect_equal(
-    names(cluster_df),
-    c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution")
-  )
 })

From b2b4cbe08a5ef360f3303e4231223fb9da1c45e6 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Wed, 11 Sep 2024 13:46:59 -0400
Subject: [PATCH 39/41] Apply suggestions from code review

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 packages/rOpenScPCA/R/calculate-clusters.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 8e06cab61..83c76f767 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -13,8 +13,10 @@
 #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
 #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
 #' @param seed Random seed to set for clustering. Default is 2024.
-#' @param cluster_args List of additional arguments to pass to the chosen clustering function. Only single-length values will be used.
+#' @param cluster_args List of additional arguments to pass to the chosen clustering function. 
+#'   Only single values for each argument are supported (no vectors or lists).
 #'   See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest
+#'
 #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
 #'   and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
 #'   leiden clustering will further include `objective_function`.

From b0a90657b7d1c3dc9e50e3ab5418b3139f4d8e8a Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Wed, 11 Sep 2024 13:52:03 -0400
Subject: [PATCH 40/41] wording update to be consistent with bluster docs
 wording

---
 packages/rOpenScPCA/R/calculate-clusters.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R
index 83c76f767..b457562eb 100644
--- a/packages/rOpenScPCA/R/calculate-clusters.R
+++ b/packages/rOpenScPCA/R/calculate-clusters.R
@@ -3,8 +3,8 @@
 #' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data.
 #' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function.
 #' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults.
-#' Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard"
-#' to align with common practice in scRNA-seq anaylsis.
+#' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard"
+#' to align with common practice in scRNA-seq analysis.
 #'
 #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)
 #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
@@ -13,7 +13,7 @@
 #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
 #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
 #' @param seed Random seed to set for clustering. Default is 2024.
-#' @param cluster_args List of additional arguments to pass to the chosen clustering function. 
+#' @param cluster_args List of additional arguments to pass to the chosen clustering function.
 #'   Only single values for each argument are supported (no vectors or lists).
 #'   See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest
 #'

From 9a8a4b12c0b0aae3e1432758612ea072b7acb439 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Wed, 11 Sep 2024 13:52:09 -0400
Subject: [PATCH 41/41] document

---
 packages/rOpenScPCA/man/calculate_clusters.Rd | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd
index 931d5950e..3006b6ba6 100644
--- a/packages/rOpenScPCA/man/calculate_clusters.Rd
+++ b/packages/rOpenScPCA/man/calculate_clusters.Rd
@@ -28,7 +28,8 @@ calculate_clusters(
 
 \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"}
 
-\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. Only single-length values will be used.
+\item{cluster_args}{List of additional arguments to pass to the chosen clustering function.
+Only single values for each argument are supported (no vectors or lists).
 See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest}
 
 \item{seed}{Random seed to set for clustering. Default is 2024.}
@@ -42,8 +43,8 @@ A data frame of cluster results with columns `cell_id` and `cluster`. Additional
 This function is provided to simplify application of bluster package clustering functions on OpenScPCA data.
 In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function.
 Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults.
-Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard"
-to align with common practice in scRNA-seq anaylsis.
+Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard"
+to align with common practice in scRNA-seq analysis.
 }
 \examples{
 \dontrun{