From 5fba01770cc5d42653eb300bbec9f3a0a03420b6 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 3 Sep 2024 14:23:29 -0400 Subject: [PATCH 01/41] initial clustering function --- packages/rOpenScPCA/R/calculate-clusters.R | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 packages/rOpenScPCA/R/calculate-clusters.R diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R new file mode 100644 index 000000000..febd489fa --- /dev/null +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -0,0 +1,68 @@ +calculate_clusters <- function( + mat, + algorithm = "louvain", + weighting = "jaccard", + nn = 10, + resolution = 1, # leiden and louvain + objective_function = "CPM", # leiden only + random_seed = 2024, + cluster_args = list()) { + set.seed(random_seed) + + # Check input arguments + stopifnot( + "The `mat` argument must be a matrix." = class(mat) %in% c("Matrix", "dgCMatrix"), + "The `mat` matrix must be numeric." = type(mat) %in% c("numeric", "double"), + "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat))) + ) + + algorithm <- toLower(algorithm) + stopifnot( + "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." = + algorithm %in% c("louvain", "walktrap", "leiden") + ) + + weighting <- toLower(weighting) + stopifnot( + "`weighting` must be one of 'jaccard' (default) or 'rank'." = + weighting %in% c("jaccard", "rank", "leiden") + ) + + # TODO: consider adding more specific checks here? + stopifnot( + "`cluster_args` must be a list." = class(cluster_args) == "list" + ) + + # Update cluster_args list with settings that users can directly provide + if (algorithm != "walktrap") { + cluster_args$resolution <- resolution + } + if (algorithm == "leiden") { + cluster_args$resolution <- objective_function + } + + + # Perform clustering + clusters <- bluster::clusterRows( + mat, + bluster::NNGraphParam( + k = nn, + type = weighting, + cluster.fun = algorithm, + cluster.args = cluster_args + ) + ) + + + # Transform results into a table and return + # TODO: Should this have _all_ parameters in cluster.args? + return( + tibble::tibble( + barcode = rownames(mat), + cluster = clusters, + algorithm = algorithm, + weighting = weighting, + nn = nn + ) + ) +} From 116c73d54924b1da2d0373bb0c2bbc54f2844a06 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 3 Sep 2024 14:30:58 -0400 Subject: [PATCH 02/41] add bluster and roxygen. Update column name to cell_id --- packages/rOpenScPCA/DESCRIPTION | 2 + packages/rOpenScPCA/R/calculate-clusters.R | 20 ++++++++- packages/rOpenScPCA/man/calculate_clusters.Rd | 45 +++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 packages/rOpenScPCA/man/calculate_clusters.Rd diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION index fa2c4f557..839d757e5 100644 --- a/packages/rOpenScPCA/DESCRIPTION +++ b/packages/rOpenScPCA/DESCRIPTION @@ -22,3 +22,5 @@ Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 RoxygenNote: 7.3.2 +Imports: + bluster diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index febd489fa..3b8a9a184 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -1,3 +1,21 @@ +#' Calculate graph-based clusters from a provided matrix +#' +#' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes) +#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden" +#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default) or "rank" +#' @param nn Number of nearest neighbors. Default is 10. +#' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. +#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" +#' @param random_seed Random seed to set for clustering. Default is 2024. +#' @param cluster_args List of additional arguments to pass to the clustering function. +#' +#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters. +#' @export +#' +#' @examples +#' \dontrun{ +#' # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED +#' } calculate_clusters <- function( mat, algorithm = "louvain", @@ -58,7 +76,7 @@ calculate_clusters <- function( # TODO: Should this have _all_ parameters in cluster.args? return( tibble::tibble( - barcode = rownames(mat), + cell_id = rownames(mat), cluster = clusters, algorithm = algorithm, weighting = weighting, diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd new file mode 100644 index 000000000..38cb178b8 --- /dev/null +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calculate-clusters.R +\name{calculate_clusters} +\alias{calculate_clusters} +\title{Calculate graph-based clusters from a provided matrix} +\usage{ +calculate_clusters( + mat, + algorithm = "louvain", + weighting = "jaccard", + nn = 10, + resolution = 1, + objective_function = "CPM", + random_seed = 2024, + cluster_args = list() +) +} +\arguments{ +\item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)} + +\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden"} + +\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default) or "rank"} + +\item{nn}{Number of nearest neighbors. Default is 10.} + +\item{resolution}{Resolution parameter used by louvain and leiden clustering only. Default is 1.} + +\item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"} + +\item{random_seed}{Random seed to set for clustering. Default is 2024.} + +\item{cluster_args}{List of additional arguments to pass to the clustering function.} +} +\value{ +A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters. +} +\description{ +Calculate graph-based clusters from a provided matrix +} +\examples{ +\dontrun{ + # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED +} +} From 3ad4f4961a4298d71cae688673c15f3f6986bd25 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 3 Sep 2024 14:49:22 -0400 Subject: [PATCH 03/41] implement test of defaults and debug --- packages/rOpenScPCA/R/calculate-clusters.R | 7 ++-- .../tests/testthat/test-calculate-clusters.R | 35 +++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 3b8a9a184..b4203deda 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -29,18 +29,17 @@ calculate_clusters <- function( # Check input arguments stopifnot( - "The `mat` argument must be a matrix." = class(mat) %in% c("Matrix", "dgCMatrix"), - "The `mat` matrix must be numeric." = type(mat) %in% c("numeric", "double"), + "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "dgCMatrix")), "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat))) ) - algorithm <- toLower(algorithm) + algorithm <- tolower(algorithm) stopifnot( "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." = algorithm %in% c("louvain", "walktrap", "leiden") ) - weighting <- toLower(weighting) + weighting <- tolower(weighting) stopifnot( "`weighting` must be one of 'jaccard' (default) or 'rank'." = weighting %in% c("jaccard", "rank", "leiden") diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R new file mode 100644 index 000000000..13a587d31 --- /dev/null +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -0,0 +1,35 @@ +test_that("calculate_clusters runs with defaults", { + test_mat <- matrix( + runif(1000, 0.1, 10), + nrow = 100, + ncol = 10 + ) + + rownames(test_mat) <- 1:100 + cluster_df <- calculate_clusters(test_mat) + + + expect_equal( + names(cluster_df), + c("cell_id", "cluster", "algorithm", "weighting", "nn") + ) + expect_equal( + cluster_df$cell_id, + as.character(1:100) + ) + expect_true( + is.factor(cluster_df$cluster) + ) + expect_equal( + unique(cluster_df$algorithm), + "louvain" + ) + expect_equal( + unique(cluster_df$weighting), + "jaccard" + ) + expect_equal( + unique(cluster_df$nn), + 10 + ) +}) From 5eff75831b07099b6027bdc84c5df51189f8d85b Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 4 Sep 2024 08:37:55 -0400 Subject: [PATCH 04/41] add testthat file --- packages/rOpenScPCA/tests/testthat.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 packages/rOpenScPCA/tests/testthat.R diff --git a/packages/rOpenScPCA/tests/testthat.R b/packages/rOpenScPCA/tests/testthat.R new file mode 100644 index 000000000..5dc4da8e3 --- /dev/null +++ b/packages/rOpenScPCA/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(rOpenScPCA) + +test_check("rOpenScPCA") From 381282682579a55ae64782226b44f340c06b6611 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 4 Sep 2024 08:39:00 -0400 Subject: [PATCH 05/41] add tibble --- packages/rOpenScPCA/DESCRIPTION | 3 ++- packages/rOpenScPCA/man/calculate_clusters.Rd | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION index 839d757e5..0893561ed 100644 --- a/packages/rOpenScPCA/DESCRIPTION +++ b/packages/rOpenScPCA/DESCRIPTION @@ -23,4 +23,5 @@ Suggests: Config/testthat/edition: 3 RoxygenNote: 7.3.2 Imports: - bluster + bluster, + tibble diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index 38cb178b8..d76097fff 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -40,6 +40,6 @@ Calculate graph-based clusters from a provided matrix } \examples{ \dontrun{ - # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED +# TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED } } From 0305faaae709087163e23450ee2fe17392d2be29 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 4 Sep 2024 08:47:51 -0400 Subject: [PATCH 06/41] some comments for future us --- packages/rOpenScPCA/R/calculate-clusters.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index b4203deda..caea7c899 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -9,7 +9,7 @@ #' @param random_seed Random seed to set for clustering. Default is 2024. #' @param cluster_args List of additional arguments to pass to the clustering function. #' -#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters. +#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters. #' @export #' #' @examples @@ -51,6 +51,7 @@ calculate_clusters <- function( ) # Update cluster_args list with settings that users can directly provide + # clusterRows throws an error if this list has a param not used by the chosen algorithm if (algorithm != "walktrap") { cluster_args$resolution <- resolution } @@ -72,7 +73,7 @@ calculate_clusters <- function( # Transform results into a table and return - # TODO: Should this have _all_ parameters in cluster.args? + # TODO: Should this have _all_ (non-default/user-specified) parameters in cluster.args? return( tibble::tibble( cell_id = rownames(mat), From f6b736d012669327d9070bdd92d5acaa79651322 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 9 Sep 2024 13:01:00 -0400 Subject: [PATCH 07/41] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- packages/rOpenScPCA/R/calculate-clusters.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index caea7c899..f92fe6759 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -23,13 +23,16 @@ calculate_clusters <- function( nn = 10, resolution = 1, # leiden and louvain objective_function = "CPM", # leiden only - random_seed = 2024, - cluster_args = list()) { - set.seed(random_seed) + cluster_args = list(), + seed = NULL +) { + if (!is.null(seed)) { + set.seed(seed) + } # Check input arguments stopifnot( - "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "dgCMatrix")), + "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")), "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat))) ) @@ -47,16 +50,17 @@ calculate_clusters <- function( # TODO: consider adding more specific checks here? stopifnot( - "`cluster_args` must be a list." = class(cluster_args) == "list" + "`cluster_args` must be a list." = is.list(cluster_args)" ) # Update cluster_args list with settings that users can directly provide # clusterRows throws an error if this list has a param not used by the chosen algorithm - if (algorithm != "walktrap") { + if (algorithm == "louvain") { cluster_args$resolution <- resolution } if (algorithm == "leiden") { - cluster_args$resolution <- objective_function + cluster_args$resolution_parameter <- resolution + cluster_args$objective_function <- objective_function } From 5b4832189f2f0427e37e97f9b19b09ccc12f1d69 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 13:02:17 -0400 Subject: [PATCH 08/41] rm .gitkeep --- packages/rOpenScPCA/R/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 packages/rOpenScPCA/R/.gitkeep diff --git a/packages/rOpenScPCA/R/.gitkeep b/packages/rOpenScPCA/R/.gitkeep deleted file mode 100644 index e69de29bb..000000000 From 29c7868d197b2bca521f19ab0267e9a1c80c4e61 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 13:03:04 -0400 Subject: [PATCH 09/41] fix quote --- packages/rOpenScPCA/R/calculate-clusters.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index f92fe6759..05565abb9 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -24,8 +24,7 @@ calculate_clusters <- function( resolution = 1, # leiden and louvain objective_function = "CPM", # leiden only cluster_args = list(), - seed = NULL -) { + seed = NULL) { if (!is.null(seed)) { set.seed(seed) } @@ -50,7 +49,7 @@ calculate_clusters <- function( # TODO: consider adding more specific checks here? stopifnot( - "`cluster_args` must be a list." = is.list(cluster_args)" + "`cluster_args` must be a list." = is.list(cluster_args) ) # Update cluster_args list with settings that users can directly provide From 15fea48ec7c9f28da63ed3a058cff8a8bc4ee915 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 13:05:06 -0400 Subject: [PATCH 10/41] reverting since resolution_parameter has been superceded --- packages/rOpenScPCA/R/calculate-clusters.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 05565abb9..a8f7ffc7c 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -21,7 +21,7 @@ calculate_clusters <- function( algorithm = "louvain", weighting = "jaccard", nn = 10, - resolution = 1, # leiden and louvain + resolution = 1, # louvain or leiden objective_function = "CPM", # leiden only cluster_args = list(), seed = NULL) { @@ -54,11 +54,10 @@ calculate_clusters <- function( # Update cluster_args list with settings that users can directly provide # clusterRows throws an error if this list has a param not used by the chosen algorithm - if (algorithm == "louvain") { + if (algorithm != "walktrap") { cluster_args$resolution <- resolution } if (algorithm == "leiden") { - cluster_args$resolution_parameter <- resolution cluster_args$objective_function <- objective_function } From b168a01fe4c0c059c501ba596cb17dfb7d7182b7 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 13:07:31 -0400 Subject: [PATCH 11/41] use match.args for algorithm and weighting --- packages/rOpenScPCA/R/calculate-clusters.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index a8f7ffc7c..5daece8f9 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -18,8 +18,8 @@ #' } calculate_clusters <- function( mat, - algorithm = "louvain", - weighting = "jaccard", + algorithm = c("louvain", "walktrap", "leiden"), + weighting = c("jaccard", "rank"), nn = 10, resolution = 1, # louvain or leiden objective_function = "CPM", # leiden only @@ -35,16 +35,17 @@ calculate_clusters <- function( "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat))) ) - algorithm <- tolower(algorithm) + algorithm <- match.arg(algorithm) + print(algorithm) stopifnot( "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." = algorithm %in% c("louvain", "walktrap", "leiden") ) - weighting <- tolower(weighting) + weighting <- match.arg(weighting) stopifnot( "`weighting` must be one of 'jaccard' (default) or 'rank'." = - weighting %in% c("jaccard", "rank", "leiden") + weighting %in% c("jaccard", "rank") ) # TODO: consider adding more specific checks here? From 0a433b8f808bc4dc1754e55dfc795cd525309570 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 13:11:21 -0400 Subject: [PATCH 12/41] use data.frame and update docs --- packages/rOpenScPCA/R/calculate-clusters.R | 4 ++-- packages/rOpenScPCA/man/calculate_clusters.Rd | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 5daece8f9..788568fc4 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -6,7 +6,7 @@ #' @param nn Number of nearest neighbors. Default is 10. #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" -#' @param random_seed Random seed to set for clustering. Default is 2024. +#' @param seed Random seed to set for clustering. Default is 2024. #' @param cluster_args List of additional arguments to pass to the clustering function. #' #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters. @@ -78,7 +78,7 @@ calculate_clusters <- function( # Transform results into a table and return # TODO: Should this have _all_ (non-default/user-specified) parameters in cluster.args? return( - tibble::tibble( + data.frame( cell_id = rownames(mat), cluster = clusters, algorithm = algorithm, diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index d76097fff..dbe53edaf 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -6,13 +6,13 @@ \usage{ calculate_clusters( mat, - algorithm = "louvain", - weighting = "jaccard", + algorithm = c("louvain", "walktrap", "leiden"), + weighting = c("jaccard", "rank"), nn = 10, resolution = 1, objective_function = "CPM", - random_seed = 2024, - cluster_args = list() + cluster_args = list(), + seed = NULL ) } \arguments{ @@ -28,12 +28,12 @@ calculate_clusters( \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"} -\item{random_seed}{Random seed to set for clustering. Default is 2024.} - \item{cluster_args}{List of additional arguments to pass to the clustering function.} + +\item{seed}{Random seed to set for clustering. Default is 2024.} } \value{ -A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters. +A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters. } \description{ Calculate graph-based clusters from a provided matrix From 288842ace2982a1d96bf68a98f808ce50420dc99 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 13:12:12 -0400 Subject: [PATCH 13/41] rm tibble dependency --- packages/rOpenScPCA/DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION index 0893561ed..839d757e5 100644 --- a/packages/rOpenScPCA/DESCRIPTION +++ b/packages/rOpenScPCA/DESCRIPTION @@ -23,5 +23,4 @@ Suggests: Config/testthat/edition: 3 RoxygenNote: 7.3.2 Imports: - bluster, - tibble + bluster From ed93f8593980dde20646d86f4219cdb284e3edae Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:07:40 -0400 Subject: [PATCH 14/41] return data frame with all parameters and update docs. update cluster_args check --- packages/rOpenScPCA/R/calculate-clusters.R | 38 ++++++++++--------- packages/rOpenScPCA/man/calculate_clusters.Rd | 4 +- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 788568fc4..42ddfd4ea 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -9,7 +9,9 @@ #' @param seed Random seed to set for clustering. Default is 2024. #' @param cluster_args List of additional arguments to pass to the clustering function. #' -#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters. +#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters +#' and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and +#' leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included. #' @export #' #' @examples @@ -36,7 +38,6 @@ calculate_clusters <- function( ) algorithm <- match.arg(algorithm) - print(algorithm) stopifnot( "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." = algorithm %in% c("louvain", "walktrap", "leiden") @@ -48,13 +49,14 @@ calculate_clusters <- function( weighting %in% c("jaccard", "rank") ) - # TODO: consider adding more specific checks here? - stopifnot( - "`cluster_args` must be a list." = is.list(cluster_args) - ) + if (length(cluster_args) != 0) { + stopifnot( + "`cluster_args` must be a named list." = is.list(cluster_args) && !(is.null(names(cluster_args))) + ) + } - # Update cluster_args list with settings that users can directly provide - # clusterRows throws an error if this list has a param not used by the chosen algorithm + # Update cluster_args list with parameters that users can directly provide + # note that clusterRows throws an error if this list has a param not used by the chosen algorithm if (algorithm != "walktrap") { cluster_args$resolution <- resolution } @@ -76,14 +78,16 @@ calculate_clusters <- function( # Transform results into a table and return - # TODO: Should this have _all_ (non-default/user-specified) parameters in cluster.args? - return( - data.frame( - cell_id = rownames(mat), - cluster = clusters, - algorithm = algorithm, - weighting = weighting, - nn = nn + cluster_df <- data.frame( + cell_id = rownames(mat), + cluster = clusters, + algorithm = algorithm, + weighting = weighting, + nn = nn + ) |> + dplyr::bind_cols( + dplyr::bind_rows(cluster_args) ) - ) + + return(cluster_df) } diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index dbe53edaf..4f6de42ba 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -33,7 +33,9 @@ calculate_clusters( \item{seed}{Random seed to set for clustering. Default is 2024.} } \value{ -A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns (TODO!) represent algorithm parameters. +A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters + and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and + leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included. } \description{ Calculate graph-based clusters from a provided matrix From d7abb398331afbe498e54ed85d2982220a1b4198 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:08:04 -0400 Subject: [PATCH 15/41] update test for all df columns --- .../tests/testthat/test-calculate-clusters.R | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index 13a587d31..8f56857bf 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -11,7 +11,7 @@ test_that("calculate_clusters runs with defaults", { expect_equal( names(cluster_df), - c("cell_id", "cluster", "algorithm", "weighting", "nn") + c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") ) expect_equal( cluster_df$cell_id, @@ -20,16 +20,16 @@ test_that("calculate_clusters runs with defaults", { expect_true( is.factor(cluster_df$cluster) ) - expect_equal( - unique(cluster_df$algorithm), - "louvain" + expect_true( + all(cluster_df$algorithm == "louvain"), ) - expect_equal( - unique(cluster_df$weighting), - "jaccard" + expect_true( + all(cluster_df$weighting == "jaccard") ) - expect_equal( - unique(cluster_df$nn), - 10 + expect_true( + all(cluster_df$nn == 10) + ) + expect_true( + all(cluster_df$resolution == 1) ) }) From e4bf3de1590cdef89d9615178306b02873c77177 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:33:18 -0400 Subject: [PATCH 16/41] dont need to handle errors with match.args --- packages/rOpenScPCA/R/calculate-clusters.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 42ddfd4ea..8137365ec 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -24,7 +24,7 @@ calculate_clusters <- function( weighting = c("jaccard", "rank"), nn = 10, resolution = 1, # louvain or leiden - objective_function = "CPM", # leiden only + objective_function = c("CPM", "modularity"), # leiden only cluster_args = list(), seed = NULL) { if (!is.null(seed)) { @@ -38,15 +38,12 @@ calculate_clusters <- function( ) algorithm <- match.arg(algorithm) - stopifnot( - "`algorithm` must be one of 'louvain' (default), 'walktrap' or 'leiden'." = - algorithm %in% c("louvain", "walktrap", "leiden") - ) - weighting <- match.arg(weighting) + objective_function <- match.arg(objective_function) + stopifnot( - "`weighting` must be one of 'jaccard' (default) or 'rank'." = - weighting %in% c("jaccard", "rank") + "`resolution` must be numeric" = is.numeric(resolution), + "`nn` must be numeric" = is.numeric(nn) ) if (length(cluster_args) != 0) { From 3498350678c4ea1fe3b5b43ae726d246e97b1ee9 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:33:52 -0400 Subject: [PATCH 17/41] add tests to handle bad args, but not match.args ones --- .../tests/testthat/test-calculate-clusters.R | 39 +++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index 8f56857bf..920618786 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -1,14 +1,13 @@ -test_that("calculate_clusters runs with defaults", { - test_mat <- matrix( - runif(1000, 0.1, 10), - nrow = 100, - ncol = 10 - ) +test_mat <- matrix( + runif(1000, 0.1, 10), + nrow = 100, + ncol = 10 +) +rownames(test_mat) <- as.character(1:100) - rownames(test_mat) <- 1:100 +test_that("calculate_clusters runs with defaults", { cluster_df <- calculate_clusters(test_mat) - expect_equal( names(cluster_df), c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") @@ -33,3 +32,27 @@ test_that("calculate_clusters runs with defaults", { all(cluster_df$resolution == 1) ) }) + + +test_that("calculate_clusters errors as expected", { + test_mat_nonames <- test_mat + rownames(test_mat_nonames) <- NULL + + expect_error( + calculate_clusters(test_mat_nonames), + "The `mat` matrix must have row names representing cell ids e.g. barcodes." + ) + expect_error( + calculate_clusters("not a matrix"), + "The `mat` argument must be a matrix." + ) + + expect_error( + calculate_clusters(test_mat, resolution = "string"), + "`resolution` must be numeric" + ) + expect_error( + calculate_clusters(test_mat, nn = "string"), + "`nn` must be numeric" + ) +}) From 9db71daa550a4e165dd892791bc8e755ed125354 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:42:57 -0400 Subject: [PATCH 18/41] add examples --- packages/rOpenScPCA/R/calculate-clusters.R | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 8137365ec..3e44e6e07 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -7,7 +7,7 @@ #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" #' @param seed Random seed to set for clustering. Default is 2024. -#' @param cluster_args List of additional arguments to pass to the clustering function. +#' @param cluster_args List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest #' #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters #' and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and @@ -16,7 +16,18 @@ #' #' @examples #' \dontrun{ -#' # TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED +#' # cluster using default parameters +#' cluster_df <- calculate_clusters(pca_matrix) +#' +#' # cluster using the leiden algorithm with a resolution of 0.1 +#' cluster_df <- calculate_clusters(pca_matrix, algorithm = "leiden", resolution = 0.1) +#' +#' # cluster using the leiden algorithm with a non-default of 3 iterations +#' cluster_df <- calculate_clusters( +#' pca_matrix, +#' algorithm = "leiden", +#' cluster_args = list(n_iterations = 3) +#' ) #' } calculate_clusters <- function( mat, From d576509791a27faa8a78f5e12c2af1fd04c114a1 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:43:06 -0400 Subject: [PATCH 19/41] rerun document --- packages/rOpenScPCA/man/calculate_clusters.Rd | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index 4f6de42ba..c313d514e 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -10,7 +10,7 @@ calculate_clusters( weighting = c("jaccard", "rank"), nn = 10, resolution = 1, - objective_function = "CPM", + objective_function = c("CPM", "modularity"), cluster_args = list(), seed = NULL ) @@ -28,7 +28,7 @@ calculate_clusters( \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"} -\item{cluster_args}{List of additional arguments to pass to the clustering function.} +\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest} \item{seed}{Random seed to set for clustering. Default is 2024.} } @@ -42,6 +42,18 @@ Calculate graph-based clusters from a provided matrix } \examples{ \dontrun{ -# TODO WILL ADD EXAMPLES ONCE ARGS ARE FINALIZED +# cluster using default parameters +cluster_df <- calculate_clusters(pca_matrix) + +# cluster using the leiden algorithm with a resolution of 0.1 +cluster_df <- calculate_clusters(pca_matrix, algorithm = "leiden", resolution = 0.1) + +# cluster using the leiden algorithm with a non-default of 3 iterations +cluster_df <- calculate_clusters( + pca_matrix, + algorithm = "leiden", + cluster_args = list(n_iterations = 3) +) + } } From 52fd2d107c11026d00f4a9f1061c57fb5a711b77 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:43:36 -0400 Subject: [PATCH 20/41] add tests for additional args --- .../tests/testthat/test-calculate-clusters.R | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index 920618786..9a9c3082b 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -20,7 +20,7 @@ test_that("calculate_clusters runs with defaults", { is.factor(cluster_df$cluster) ) expect_true( - all(cluster_df$algorithm == "louvain"), + all(cluster_df$algorithm == "louvain") ) expect_true( all(cluster_df$weighting == "jaccard") @@ -34,6 +34,26 @@ test_that("calculate_clusters runs with defaults", { }) +test_that("calculate_clusters runs with additional cluster_args", { + cluster_df <- calculate_clusters( + test_mat, + algorithm = "leiden", + # the default is 2 + cluster_args = list(n_iterations = 3) + ) + + expect_setequal( + names(cluster_df), + c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution", "objective_function", "n_iterations") + ) + expect_true( + all(cluster_df$n_iterations == 3) + ) +}) + + + + test_that("calculate_clusters errors as expected", { test_mat_nonames <- test_mat rownames(test_mat_nonames) <- NULL From c11cedc445e368a3400bd060d49f604afabebb47 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:46:14 -0400 Subject: [PATCH 21/41] add dplyr dep --- packages/rOpenScPCA/DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION index 839d757e5..9da548377 100644 --- a/packages/rOpenScPCA/DESCRIPTION +++ b/packages/rOpenScPCA/DESCRIPTION @@ -23,4 +23,5 @@ Suggests: Config/testthat/edition: 3 RoxygenNote: 7.3.2 Imports: - bluster + bluster, + dplyr From e35bdf6f316916241a2f37afc9a7d13bb291aa92 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:46:38 -0400 Subject: [PATCH 22/41] update error --- packages/rOpenScPCA/R/calculate-clusters.R | 2 +- packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 3e44e6e07..9e16ec356 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -45,7 +45,7 @@ calculate_clusters <- function( # Check input arguments stopifnot( "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")), - "The `mat` matrix must have row names representing cell ids (e.g. barcodes)." = !(is.null(rownames(mat))) + "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = !(is.null(rownames(mat))) ) algorithm <- match.arg(algorithm) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index 9a9c3082b..e6bc4b87b 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -60,7 +60,7 @@ test_that("calculate_clusters errors as expected", { expect_error( calculate_clusters(test_mat_nonames), - "The `mat` matrix must have row names representing cell ids e.g. barcodes." + "The `mat` matrix must have row names representing cell ids, e.g. barcodes." ) expect_error( calculate_clusters("not a matrix"), From b724ebb3725b740b16d346ab26b1f281244135af Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 9 Sep 2024 16:46:51 -0400 Subject: [PATCH 23/41] document --- packages/rOpenScPCA/man/calculate_clusters.Rd | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index c313d514e..f86960b10 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -50,10 +50,9 @@ cluster_df <- calculate_clusters(pca_matrix, algorithm = "leiden", resolution = # cluster using the leiden algorithm with a non-default of 3 iterations cluster_df <- calculate_clusters( - pca_matrix, - algorithm = "leiden", - cluster_args = list(n_iterations = 3) + pca_matrix, + algorithm = "leiden", + cluster_args = list(n_iterations = 3) ) - } } From 4d1a91f34e6ecddda533b7c580c17144a274a829 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 10 Sep 2024 11:21:00 -0400 Subject: [PATCH 24/41] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- packages/rOpenScPCA/R/calculate-clusters.R | 6 +++--- .../rOpenScPCA/tests/testthat/test-calculate-clusters.R | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 9e16ec356..bf72bce15 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -45,7 +45,7 @@ calculate_clusters <- function( # Check input arguments stopifnot( "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")), - "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = !(is.null(rownames(mat))) + "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(mat)) ) algorithm <- match.arg(algorithm) @@ -57,7 +57,7 @@ calculate_clusters <- function( "`nn` must be numeric" = is.numeric(nn) ) - if (length(cluster_args) != 0) { + if (length(cluster_args)) { stopifnot( "`cluster_args` must be a named list." = is.list(cluster_args) && !(is.null(names(cluster_args))) ) @@ -65,7 +65,7 @@ calculate_clusters <- function( # Update cluster_args list with parameters that users can directly provide # note that clusterRows throws an error if this list has a param not used by the chosen algorithm - if (algorithm != "walktrap") { + if (algorithm %in% c("louvain", "leiden")) { cluster_args$resolution <- resolution } if (algorithm == "leiden") { diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index e6bc4b87b..dc0df73a7 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -1,5 +1,5 @@ test_mat <- matrix( - runif(1000, 0.1, 10), + runif(1000, -3, 3), nrow = 100, ncol = 10 ) @@ -16,9 +16,7 @@ test_that("calculate_clusters runs with defaults", { cluster_df$cell_id, as.character(1:100) ) - expect_true( - is.factor(cluster_df$cluster) - ) + expect_type(cluster_df$cluster, "factor") expect_true( all(cluster_df$algorithm == "louvain") ) From 4daa7e51b453b852adfd09a99c5260c18d94abce Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 10 Sep 2024 11:30:50 -0400 Subject: [PATCH 25/41] remove .gitkeep again, got restored with the main merge --- packages/rOpenScPCA/R/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 packages/rOpenScPCA/R/.gitkeep diff --git a/packages/rOpenScPCA/R/.gitkeep b/packages/rOpenScPCA/R/.gitkeep deleted file mode 100644 index e69de29bb..000000000 From 835b7eecff54c2fb273088c451f7aa451090f610 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 10 Sep 2024 11:35:59 -0400 Subject: [PATCH 26/41] expect_s3_class --- packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index dc0df73a7..6a6796cd8 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -16,7 +16,7 @@ test_that("calculate_clusters runs with defaults", { cluster_df$cell_id, as.character(1:100) ) - expect_type(cluster_df$cluster, "factor") + expect_s3_class(cluster_df$cluster, "factor") expect_true( all(cluster_df$algorithm == "louvain") ) From d8f5b311676d5ded0ed94f2c41cacce29ba45f72 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 10 Sep 2024 11:36:38 -0400 Subject: [PATCH 27/41] combine stopifnots and update to allNames --- packages/rOpenScPCA/R/calculate-clusters.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index bf72bce15..89bcb5637 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -45,21 +45,18 @@ calculate_clusters <- function( # Check input arguments stopifnot( "The `mat` argument must be a matrix." = any(class(mat) %in% c("matrix", "Matrix")), - "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(mat)) + "The `mat` matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(mat)), + "`resolution` must be numeric" = is.numeric(resolution), + "`nn` must be numeric" = is.numeric(nn) ) algorithm <- match.arg(algorithm) weighting <- match.arg(weighting) objective_function <- match.arg(objective_function) - stopifnot( - "`resolution` must be numeric" = is.numeric(resolution), - "`nn` must be numeric" = is.numeric(nn) - ) - if (length(cluster_args)) { stopifnot( - "`cluster_args` must be a named list." = is.list(cluster_args) && !(is.null(names(cluster_args))) + "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% allNames(cluster_args)) ) } From 329180d6c634bbdff7f54781bc911673f6ab48e2 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 10 Sep 2024 11:37:25 -0400 Subject: [PATCH 28/41] dont check the specific error --- .../tests/testthat/test-calculate-clusters.R | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index 6a6796cd8..a0f3d6bb1 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -56,21 +56,8 @@ test_that("calculate_clusters errors as expected", { test_mat_nonames <- test_mat rownames(test_mat_nonames) <- NULL - expect_error( - calculate_clusters(test_mat_nonames), - "The `mat` matrix must have row names representing cell ids, e.g. barcodes." - ) - expect_error( - calculate_clusters("not a matrix"), - "The `mat` argument must be a matrix." - ) - - expect_error( - calculate_clusters(test_mat, resolution = "string"), - "`resolution` must be numeric" - ) - expect_error( - calculate_clusters(test_mat, nn = "string"), - "`nn` must be numeric" - ) + expect_error(calculate_clusters(test_mat_nonames)) + expect_error(calculate_clusters("not a matrix")) + expect_error(calculate_clusters(test_mat, resolution = "string")) + expect_error(calculate_clusters(test_mat, nn = "string")) }) From 37d4973faa30aec4b55e4bcdef1def21419f181b Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 10 Sep 2024 16:04:17 -0400 Subject: [PATCH 29/41] use expect_equal instead --- .../tests/testthat/test-calculate-clusters.R | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index a0f3d6bb1..d502399aa 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -16,18 +16,27 @@ test_that("calculate_clusters runs with defaults", { cluster_df$cell_id, as.character(1:100) ) - expect_s3_class(cluster_df$cluster, "factor") - expect_true( - all(cluster_df$algorithm == "louvain") + + expect_s3_class( + cluster_df$cluster, + "factor" + ) + + expect_equal( + unique(cluster_df$algorithm), + "louvain" ) - expect_true( - all(cluster_df$weighting == "jaccard") + expect_equal( + unique(cluster_df$weighting), + "jaccard" ) - expect_true( - all(cluster_df$nn == 10) + expect_equal( + unique(cluster_df$nn), + 10 ) - expect_true( - all(cluster_df$resolution == 1) + expect_equal( + unique(cluster_df$resolution), + 1 ) }) @@ -44,8 +53,9 @@ test_that("calculate_clusters runs with additional cluster_args", { names(cluster_df), c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution", "objective_function", "n_iterations") ) - expect_true( - all(cluster_df$n_iterations == 3) + expect_equal( + unique(cluster_df$n_iterations), + 3 ) }) From 662c33f3b8289e7210b058a420a7aa1df95f3730 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 10 Sep 2024 16:11:24 -0400 Subject: [PATCH 30/41] rm comment --- packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index d502399aa..251612d0d 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -45,7 +45,6 @@ test_that("calculate_clusters runs with additional cluster_args", { cluster_df <- calculate_clusters( test_mat, algorithm = "leiden", - # the default is 2 cluster_args = list(n_iterations = 3) ) From fe022d56bebf5d3a26661429bf93fe8f6d6dec46 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 10 Sep 2024 16:24:47 -0400 Subject: [PATCH 31/41] expand docs for non-default args, and add number weighting --- packages/rOpenScPCA/R/calculate-clusters.R | 18 ++++++++++++------ packages/rOpenScPCA/man/calculate_clusters.Rd | 10 +++++++--- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 89bcb5637..ceb47e198 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -1,17 +1,22 @@ #' Calculate graph-based clusters from a provided matrix #' #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes) -#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden" -#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default) or "rank" +#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". +#' Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is +#' because louvain clustering is more commonly-used in scRNA-seq analysis. +#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" +#' Be aware that the default of "jaccard" is different from the {bluster} package default of "rank". +#' This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis. #' @param nn Number of nearest neighbors. Default is 10. #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" #' @param seed Random seed to set for clustering. Default is 2024. -#' @param cluster_args List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest -#' +#' @param cluster_args List of additional arguments to pass to the chosen clustering function. Only single-length values will be used. +#' See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters #' and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and -#' leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included. +#' leiden clustering will further include `objective_function`. +#' #' @export #' #' @examples @@ -32,7 +37,7 @@ calculate_clusters <- function( mat, algorithm = c("louvain", "walktrap", "leiden"), - weighting = c("jaccard", "rank"), + weighting = c("jaccard", "rank", "number"), nn = 10, resolution = 1, # louvain or leiden objective_function = c("CPM", "modularity"), # leiden only @@ -60,6 +65,7 @@ calculate_clusters <- function( ) } + # Update cluster_args list with parameters that users can directly provide # note that clusterRows throws an error if this list has a param not used by the chosen algorithm if (algorithm %in% c("louvain", "leiden")) { diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index f86960b10..b94cb48a3 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -7,7 +7,7 @@ calculate_clusters( mat, algorithm = c("louvain", "walktrap", "leiden"), - weighting = c("jaccard", "rank"), + weighting = c("jaccard", "rank", "number"), nn = 10, resolution = 1, objective_function = c("CPM", "modularity"), @@ -18,9 +18,13 @@ calculate_clusters( \arguments{ \item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)} -\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden"} +\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". +Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is +because louvain clustering is more commonly-used in scRNA-seq analysis.} -\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default) or "rank"} +\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" +Be aware that the default of "jaccard" is different from the {bluster} package default of "rank". +This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.} \item{nn}{Number of nearest neighbors. Default is 10.} From 7075b172c4c278db2ce60671dab1217595159759 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 10 Sep 2024 16:42:08 -0400 Subject: [PATCH 32/41] add test for single length args --- .../tests/testthat/test-calculate-clusters.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index 251612d0d..ee7f9c27c 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -70,3 +70,19 @@ test_that("calculate_clusters errors as expected", { expect_error(calculate_clusters(test_mat, resolution = "string")) expect_error(calculate_clusters(test_mat, nn = "string")) }) + + + +test_that("calculate_clusters handles cluster_args with length > 1", { + expect_warning( + cluster_df <- calculate_clusters( + test_mat, + cluster_args = list(too_long = 1:10) + ) + ) + + expect_equal( + names(cluster_df), + c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") + ) +}) From c89e3b9702a5b02caf7679641772512f0428ccb0 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 10 Sep 2024 16:42:36 -0400 Subject: [PATCH 33/41] update DESCRIPTION and check appeasement --- packages/rOpenScPCA/DESCRIPTION | 4 +++- packages/rOpenScPCA/R/calculate-clusters.R | 17 ++++++++++++++--- packages/rOpenScPCA/man/calculate_clusters.Rd | 9 +++++---- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION index 9da548377..2574da715 100644 --- a/packages/rOpenScPCA/DESCRIPTION +++ b/packages/rOpenScPCA/DESCRIPTION @@ -24,4 +24,6 @@ Config/testthat/edition: 3 RoxygenNote: 7.3.2 Imports: bluster, - dplyr + dplyr, + methods, + purrr diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index ceb47e198..c5c8d5a31 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -2,10 +2,10 @@ #' #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes) #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". -#' Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is +#' Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is #' because louvain clustering is more commonly-used in scRNA-seq analysis. #' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" -#' Be aware that the default of "jaccard" is different from the {bluster} package default of "rank". +#' Be aware that the default of "jaccard" is different from the bluster package default of "rank". #' This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis. #' @param nn Number of nearest neighbors. Default is 10. #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. @@ -61,10 +61,21 @@ calculate_clusters <- function( if (length(cluster_args)) { stopifnot( - "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% allNames(cluster_args)) + "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args)) ) } + # Do not use any arguments whose length is >1 + if (any(sapply(cluster_args, length) > 1)) { + warning("Only single-length values in 'cluster_args' will be used.") + + cluster_args <- cluster_args |> + purrr::keep( + \(arg) { + length(arg) == 1 + } + ) + } # Update cluster_args list with parameters that users can directly provide # note that clusterRows throws an error if this list has a param not used by the chosen algorithm diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index b94cb48a3..669b67725 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -19,11 +19,11 @@ calculate_clusters( \item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)} \item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". -Be aware that the default of "louvain" is different from the {bluster} package default of "walktrap". This difference is +Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is because louvain clustering is more commonly-used in scRNA-seq analysis.} \item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" -Be aware that the default of "jaccard" is different from the {bluster} package default of "rank". +Be aware that the default of "jaccard" is different from the bluster package default of "rank". This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.} \item{nn}{Number of nearest neighbors. Default is 10.} @@ -32,14 +32,15 @@ This difference is because jaccard weighting is more commonly-used in scRNA-seq \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"} -\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. See igraph documentation for details: https://igraph.org/r/html/latest} +\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. Only single-length values will be used. +See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest} \item{seed}{Random seed to set for clustering. Default is 2024.} } \value{ A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and - leiden clustering will further include `objective_function`. Any other values passed to `cluster_args` will also be included. + leiden clustering will further include `objective_function`. } \description{ Calculate graph-based clusters from a provided matrix From e536f021ce4b045a2e78a8e130c6b44d2092d321 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Tue, 10 Sep 2024 16:44:39 -0400 Subject: [PATCH 34/41] Update packages/rOpenScPCA/R/calculate-clusters.R Co-authored-by: Joshua Shapiro --- packages/rOpenScPCA/R/calculate-clusters.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index c5c8d5a31..ee60ac20b 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -108,7 +108,7 @@ calculate_clusters <- function( nn = nn ) |> dplyr::bind_cols( - dplyr::bind_rows(cluster_args) + data.frame(cluster_args) ) return(cluster_df) From bcc92547192b5f174fdc7c1562f477f64d5c2464 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 11 Sep 2024 10:44:40 -0400 Subject: [PATCH 35/41] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- packages/rOpenScPCA/R/calculate-clusters.R | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index ee60ac20b..32f63edc9 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -1,5 +1,10 @@ #' Calculate graph-based clusters from a provided matrix #' +#' This function is provided to simplify application of some bluster functions for OpenScPCA data. +#' Note that defaults for some arguments may differ from the bluster defaults. +#' In particular, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" +#' to align with common practice. +#' #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes) #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". #' Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is @@ -61,22 +66,11 @@ calculate_clusters <- function( if (length(cluster_args)) { stopifnot( - "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args)) + "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args)), + "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1)) ) } - # Do not use any arguments whose length is >1 - if (any(sapply(cluster_args, length) > 1)) { - warning("Only single-length values in 'cluster_args' will be used.") - - cluster_args <- cluster_args |> - purrr::keep( - \(arg) { - length(arg) == 1 - } - ) - } - # Update cluster_args list with parameters that users can directly provide # note that clusterRows throws an error if this list has a param not used by the chosen algorithm if (algorithm %in% c("louvain", "leiden")) { From d904171b3abebab0bbe88ccac1c2d6172322c516 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 11 Sep 2024 10:48:46 -0400 Subject: [PATCH 36/41] docs and syntax updates --- packages/rOpenScPCA/R/calculate-clusters.R | 15 ++++++--------- packages/rOpenScPCA/man/calculate_clusters.Rd | 14 +++++++------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 32f63edc9..8e06cab61 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -1,17 +1,14 @@ #' Calculate graph-based clusters from a provided matrix #' -#' This function is provided to simplify application of some bluster functions for OpenScPCA data. -#' Note that defaults for some arguments may differ from the bluster defaults. -#' In particular, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" -#' to align with common practice. +#' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. +#' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function. +#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. +#' Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" +#' to align with common practice in scRNA-seq anaylsis. #' #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes) #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". -#' Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is -#' because louvain clustering is more commonly-used in scRNA-seq analysis. #' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" -#' Be aware that the default of "jaccard" is different from the bluster package default of "rank". -#' This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis. #' @param nn Number of nearest neighbors. Default is 10. #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" @@ -67,7 +64,7 @@ calculate_clusters <- function( if (length(cluster_args)) { stopifnot( "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% methods::allNames(cluster_args)), - "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1)) + "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1) ) } diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index 669b67725..931d5950e 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -18,13 +18,9 @@ calculate_clusters( \arguments{ \item{mat}{Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes)} -\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". -Be aware that the default of "louvain" is different from the bluster package default of "walktrap". This difference is -because louvain clustering is more commonly-used in scRNA-seq analysis.} +\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".} -\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" -Be aware that the default of "jaccard" is different from the bluster package default of "rank". -This difference is because jaccard weighting is more commonly-used in scRNA-seq analysis.} +\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"} \item{nn}{Number of nearest neighbors. Default is 10.} @@ -43,7 +39,11 @@ A data frame of cluster results with columns `cell_id` and `cluster`. Additional leiden clustering will further include `objective_function`. } \description{ -Calculate graph-based clusters from a provided matrix +This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. +In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function. +Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. +Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" +to align with common practice in scRNA-seq anaylsis. } \examples{ \dontrun{ From 0cdfe03269c5ad82ada0f67a17dacd1a2383ed19 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 11 Sep 2024 10:49:41 -0400 Subject: [PATCH 37/41] remove purrr dependency --- packages/rOpenScPCA/DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION index 2574da715..ff59937d6 100644 --- a/packages/rOpenScPCA/DESCRIPTION +++ b/packages/rOpenScPCA/DESCRIPTION @@ -25,5 +25,4 @@ RoxygenNote: 7.3.2 Imports: bluster, dplyr, - methods, - purrr + methods From e7b22cc6f1bc488fceab90571a4c2ffcf75ba413 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 11 Sep 2024 10:49:59 -0400 Subject: [PATCH 38/41] update test to expect_error --- .../tests/testthat/test-calculate-clusters.R | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R index ee7f9c27c..b150c4e8d 100644 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R @@ -69,20 +69,10 @@ test_that("calculate_clusters errors as expected", { expect_error(calculate_clusters("not a matrix")) expect_error(calculate_clusters(test_mat, resolution = "string")) expect_error(calculate_clusters(test_mat, nn = "string")) -}) - - - -test_that("calculate_clusters handles cluster_args with length > 1", { - expect_warning( - cluster_df <- calculate_clusters( + expect_error( + calculate_clusters( test_mat, cluster_args = list(too_long = 1:10) ) ) - - expect_equal( - names(cluster_df), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") - ) }) From b2b4cbe08a5ef360f3303e4231223fb9da1c45e6 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 11 Sep 2024 13:46:59 -0400 Subject: [PATCH 39/41] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- packages/rOpenScPCA/R/calculate-clusters.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 8e06cab61..83c76f767 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -13,8 +13,10 @@ #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" #' @param seed Random seed to set for clustering. Default is 2024. -#' @param cluster_args List of additional arguments to pass to the chosen clustering function. Only single-length values will be used. +#' @param cluster_args List of additional arguments to pass to the chosen clustering function. +#' Only single values for each argument are supported (no vectors or lists). #' See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest +#' #' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters #' and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and #' leiden clustering will further include `objective_function`. From b0a90657b7d1c3dc9e50e3ab5418b3139f4d8e8a Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 11 Sep 2024 13:52:03 -0400 Subject: [PATCH 40/41] wording update to be consistent with bluster docs wording --- packages/rOpenScPCA/R/calculate-clusters.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R index 83c76f767..b457562eb 100644 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ b/packages/rOpenScPCA/R/calculate-clusters.R @@ -3,8 +3,8 @@ #' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. #' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function. #' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. -#' Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" -#' to align with common practice in scRNA-seq anaylsis. +#' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" +#' to align with common practice in scRNA-seq analysis. #' #' @param mat Matrix, usually of PCs, where each row is a cell. Matrix must have rownames of cell ids (e.g., barcodes) #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". @@ -13,7 +13,7 @@ #' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. #' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" #' @param seed Random seed to set for clustering. Default is 2024. -#' @param cluster_args List of additional arguments to pass to the chosen clustering function. +#' @param cluster_args List of additional arguments to pass to the chosen clustering function. #' Only single values for each argument are supported (no vectors or lists). #' See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest #' From 9a8a4b12c0b0aae3e1432758612ea072b7acb439 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 11 Sep 2024 13:52:09 -0400 Subject: [PATCH 41/41] document --- packages/rOpenScPCA/man/calculate_clusters.Rd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd index 931d5950e..3006b6ba6 100644 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ b/packages/rOpenScPCA/man/calculate_clusters.Rd @@ -28,7 +28,8 @@ calculate_clusters( \item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"} -\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. Only single-length values will be used. +\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. +Only single values for each argument are supported (no vectors or lists). See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest} \item{seed}{Random seed to set for clustering. Default is 2024.} @@ -42,8 +43,8 @@ A data frame of cluster results with columns `cell_id` and `cluster`. Additional This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function. Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. -Specifically, the clustering algorithm defaults to "louvain" and the distance measure to "jaccard" -to align with common practice in scRNA-seq anaylsis. +Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" +to align with common practice in scRNA-seq analysis. } \examples{ \dontrun{