From 830361841f2f628d5381e8d61a1183ead011fc66 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Thu, 10 Oct 2024 16:40:30 +0200
Subject: [PATCH] Draft `row_sums()` as complement to `row_means()`

---
 NAMESPACE                       |   1 +
 NEWS.md                         |   3 +
 R/row_means.R                   | 142 ++++++++++++++++++++++----------
 man/row_means.Rd                |  55 +++++++++----
 tests/testthat/test-row_means.R |  11 ++-
 5 files changed, 150 insertions(+), 62 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index c435c0cc5..1c2edb93a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -297,6 +297,7 @@ export(reshape_wider)
 export(reverse)
 export(reverse_scale)
 export(row_means)
+export(row_sums)
 export(row_to_colnames)
 export(rowid_as_column)
 export(rownames_as_column)
diff --git a/NEWS.md b/NEWS.md
index 388c5a822..d6976d12f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,9 @@ CHANGES
   variables, can now also be a character vector with quoted variable names,
   including a colon to indicate a range of several variables (e.g. `"cyl:gear"`).
 
+* New function `row_sums()`, to calculate row sums (optionally with minimum
+  amount of valid values), as complement to `row_means()`.
+
 BUG FIXES
 
 * `describe_distribution()` no longer errors if the sample was too sparse to compute
diff --git a/R/row_means.R b/R/row_means.R
index 4d2876c6a..729c800be 100644
--- a/R/row_means.R
+++ b/R/row_means.R
@@ -1,15 +1,16 @@
-#' @title Row means (optionally with minimum amount of valid values)
+#' @title Row means or sums (optionally with minimum amount of valid values)
 #' @name row_means
-#' @description This function is similar to the SPSS `MEAN.n` function and computes
-#' row means from a data frame or matrix if at least `min_valid` values of a row are
-#' valid (and not `NA`).
+#' @description This function is similar to the SPSS `MEAN.n` or `SUM.n`
+#' function and computes row means or row sums from a data frame or matrix if at
+#' least `min_valid` values of a row are valid (and not `NA`).
 #'
-#' @param data A data frame with at least two columns, where row means are applied.
+#' @param data A data frame with at least two columns, where row means or row
+#' sums are applied.
 #' @param min_valid Optional, a numeric value of length 1. May either be
 #' - a numeric value that indicates the amount of valid values per row to
-#'   calculate the row mean;
+#'   calculate the row mean or row sum;
 #' - or a value between `0` and `1`, indicating a proportion of valid values per
-#'   row to calculate the row mean (see 'Details').
+#'   row to calculate the row mean or row sum (see 'Details').
 #' - `NULL` (default), in which all cases are considered.
 #'
 #' If a row's sum of valid values is less than `min_valid`, `NA` will be returned.
@@ -17,21 +18,24 @@
 #' used for rounding mean values. Negative values are allowed (see 'Details').
 #' By default, `digits = NULL` and no rounding is used.
 #' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values
-#' before calculating row means. Only applies if `min_valuid` is not specified.
+#' before calculating row means or row sums. Only applies if `min_valid` is not
+#' specified.
 #' @param verbose Toggle warnings.
 #' @inheritParams extract_column_names
 #'
-#' @return A vector with row means for those rows with at least `n` valid values.
+#' @return A vector with row means (for `row_means()`) or row sums (for
+#' `row_sums()`) for those rows with at least `n` valid values.
 #'
-#' @details Rounding to a negative number of `digits` means rounding to a power of
-#' ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest hundred.
-#' For `min_valid`, if not `NULL`, `min_valid` must be a numeric value from `0`
-#' to `ncol(data)`. If a row in the data frame has at least `min_valid`
-#' non-missing values, the row mean is returned. If `min_valid` is a non-integer
-#' value from 0 to 1, `min_valid` is considered to indicate the proportion of
-#' required non-missing values per row. E.g., if `min_valid = 0.75`, a row must
-#' have at least `ncol(data) * min_valid` non-missing values for the row mean
-#' to be calculated. See 'Examples'.
+#' @details Rounding to a negative number of `digits` means rounding to a power
+#' of ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest
+#' hundred. For `min_valid`, if not `NULL`, `min_valid` must be a numeric value
+#' from `0` to `ncol(data)`. If a row in the data frame has at least `min_valid`
+#' non-missing values, the row mean or row sum is returned. If `min_valid` is a
+#' non-integer value from 0 to 1, `min_valid` is considered to indicate the
+#' proportion of required non-missing values per row. E.g., if
+#' `min_valid = 0.75`, a row must have at least `ncol(data) * min_valid`
+#' non-missing values for the row mean or row sum to be calculated. See
+#' 'Examples'.
 #'
 #' @examples
 #' dat <- data.frame(
@@ -49,6 +53,7 @@
 #'
 #' # needs at least 4 non-missing values per row
 #' row_means(dat, min_valid = 4) # 1 valid return value
+#' row_sums(dat, min_valid = 4) # 1 valid return value
 #'
 #' # needs at least 3 non-missing values per row
 #' row_means(dat, min_valid = 3) # 2 valid return values
@@ -61,6 +66,7 @@
 #'
 #' # needs at least 50% of non-missing values per row
 #' row_means(dat, min_valid = 0.5) # 3 valid return values
+#' row_sums(dat, min_valid = 0.5)
 #'
 #' # needs at least 75% of non-missing values per row
 #' row_means(dat, min_valid = 0.75) # 2 valid return values
@@ -84,34 +90,52 @@ row_means <- function(data,
     verbose = verbose
   )
 
-  if (is.null(select) || length(select) == 0) {
-    insight::format_error("No columns selected.")
-  }
+  # prepare data, sanity checks
+  data <- .prepare_row_data(data, select, min_valid, verbose)
 
-  data <- .coerce_to_dataframe(data[select])
+  # calculate row means
+  .row_sums_or_means(data, min_valid, digits, remove_na, fun = "mean")
+}
 
-  # n must be a numeric, non-missing value
-  if (!is.null(min_valid) && (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)) {
-    insight::format_error("`min_valid` must be a numeric value of length 1.")
-  }
 
-  # make sure we only have numeric values
-  numeric_columns <- vapply(data, is.numeric, TRUE)
-  if (!all(numeric_columns)) {
-    if (verbose) {
-      insight::format_alert("Only numeric columns are considered for calculation.")
-    }
-    data <- data[numeric_columns]
-  }
+#' @rdname row_means
+#' @export
+row_sums <- function(data,
+                     select = NULL,
+                     exclude = NULL,
+                     min_valid = NULL,
+                     digits = NULL,
+                     ignore_case = FALSE,
+                     regex = FALSE,
+                     remove_na = FALSE,
+                     verbose = TRUE) {
+  # evaluate arguments
+  select <- .select_nse(select,
+    data,
+    exclude,
+    ignore_case = ignore_case,
+    regex = regex,
+    verbose = verbose
+  )
+
+  # prepare data, sanity checks
+  data <- .prepare_row_data(data, select, min_valid, verbose)
+
+  # calculate row sums
+  .row_sums_or_means(data, min_valid, digits, remove_na, fun = "sum")
+}
 
-  # check if we have a data framme with at least two columns
-  if (ncol(data) < 2) {
-    insight::format_error("`data` must be a data frame with at least two numeric columns.")
-  }
 
-  # proceed here if min_valid is not NULL
+# helper ------------------------
+
+# calculate row means or sums
+.row_sums_or_means <- function(data, min_valid, digits, remove_na, fun) {
   if (is.null(min_valid)) {
-    out <- rowMeans(data, na.rm = remove_na)
+    # calculate row means or sums for complete data
+    out <- switch(fun,
+      mean = rowMeans(data, na.rm = remove_na),
+      rowSums(data, na.rm = remove_na)
+    )
   } else {
     # is 'min_valid' indicating a proportion?
     decimals <- min_valid %% 1
@@ -124,9 +148,12 @@ row_means <- function(data,
       insight::format_error("`min_valid` must be smaller or equal to number of columns in data frame.")
     }
 
-    # row means
+    # row means or sums
     to_na <- rowSums(is.na(data)) > ncol(data) - min_valid
-    out <- rowMeans(data, na.rm = TRUE)
+    out <- switch(fun,
+      mean = rowMeans(data, na.rm = TRUE),
+      rowSums(data, na.rm = TRUE)
+    )
     out[to_na] <- NA
   }
 
@@ -137,3 +164,34 @@ row_means <- function(data,
 
   out
 }
+
+
+# check that data is in shape for row means or row sums
+.prepare_row_data <- function(data, select, min_valid, verbose) {
+  if (is.null(select) || length(select) == 0) {
+    insight::format_error("No columns selected.")
+  }
+
+  data <- .coerce_to_dataframe(data[select])
+
+  # n must be a numeric, non-missing value
+  if (!is.null(min_valid) && (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)) {
+    insight::format_error("`min_valid` must be a numeric value of length 1.")
+  }
+
+  # make sure we only have numeric values
+  numeric_columns <- vapply(data, is.numeric, TRUE)
+  if (!all(numeric_columns)) {
+    if (verbose) {
+      insight::format_alert("Only numeric columns are considered for calculation.")
+    }
+    data <- data[numeric_columns]
+  }
+
+  # check if we have a data framme with at least two columns
+  if (ncol(data) < 2) {
+    insight::format_error("`data` must be a data frame with at least two numeric columns.")
+  }
+
+  data
+}
diff --git a/man/row_means.Rd b/man/row_means.Rd
index 4d61e8cf2..43d85b5b0 100644
--- a/man/row_means.Rd
+++ b/man/row_means.Rd
@@ -2,7 +2,8 @@
 % Please edit documentation in R/row_means.R
 \name{row_means}
 \alias{row_means}
-\title{Row means (optionally with minimum amount of valid values)}
+\alias{row_sums}
+\title{Row means or sums (optionally with minimum amount of valid values)}
 \usage{
 row_means(
   data,
@@ -15,9 +16,22 @@ row_means(
   remove_na = FALSE,
   verbose = TRUE
 )
+
+row_sums(
+  data,
+  select = NULL,
+  exclude = NULL,
+  min_valid = NULL,
+  digits = NULL,
+  ignore_case = FALSE,
+  regex = FALSE,
+  remove_na = FALSE,
+  verbose = TRUE
+)
 }
 \arguments{
-\item{data}{A data frame with at least two columns, where row means are applied.}
+\item{data}{A data frame with at least two columns, where row means or row
+sums are applied.}
 
 \item{select}{Variables that will be included when performing the required
 tasks. Can be either
@@ -60,9 +74,9 @@ excludes no columns.}
 \item{min_valid}{Optional, a numeric value of length 1. May either be
 \itemize{
 \item a numeric value that indicates the amount of valid values per row to
-calculate the row mean;
+calculate the row mean or row sum;
 \item or a value between \code{0} and \code{1}, indicating a proportion of valid values per
-row to calculate the row mean (see 'Details').
+row to calculate the row mean or row sum (see 'Details').
 \item \code{NULL} (default), in which all cases are considered.
 }
 
@@ -86,28 +100,31 @@ since the select-helpers may not work when called from inside other
 functions (see 'Details'), this argument may be used as workaround.}
 
 \item{remove_na}{Logical, if \code{TRUE} (default), removes missing (\code{NA}) values
-before calculating row means. Only applies if \code{min_valuid} is not specified.}
+before calculating row means or row sums. Only applies if \code{min_valid} is not
+specified.}
 
 \item{verbose}{Toggle warnings.}
 }
 \value{
-A vector with row means for those rows with at least \code{n} valid values.
+A vector with row means (for \code{row_means()}) or row sums (for
+\code{row_sums()}) for those rows with at least \code{n} valid values.
 }
 \description{
-This function is similar to the SPSS \code{MEAN.n} function and computes
-row means from a data frame or matrix if at least \code{min_valid} values of a row are
-valid (and not \code{NA}).
+This function is similar to the SPSS \code{MEAN.n} or \code{SUM.n}
+function and computes row means or row sums from a data frame or matrix if at
+least \code{min_valid} values of a row are valid (and not \code{NA}).
 }
 \details{
-Rounding to a negative number of \code{digits} means rounding to a power of
-ten, for example \code{row_means(df, 3, digits = -2)} rounds to the nearest hundred.
-For \code{min_valid}, if not \code{NULL}, \code{min_valid} must be a numeric value from \code{0}
-to \code{ncol(data)}. If a row in the data frame has at least \code{min_valid}
-non-missing values, the row mean is returned. If \code{min_valid} is a non-integer
-value from 0 to 1, \code{min_valid} is considered to indicate the proportion of
-required non-missing values per row. E.g., if \code{min_valid = 0.75}, a row must
-have at least \code{ncol(data) * min_valid} non-missing values for the row mean
-to be calculated. See 'Examples'.
+Rounding to a negative number of \code{digits} means rounding to a power
+of ten, for example \code{row_means(df, 3, digits = -2)} rounds to the nearest
+hundred. For \code{min_valid}, if not \code{NULL}, \code{min_valid} must be a numeric value
+from \code{0} to \code{ncol(data)}. If a row in the data frame has at least \code{min_valid}
+non-missing values, the row mean or row sum is returned. If \code{min_valid} is a
+non-integer value from 0 to 1, \code{min_valid} is considered to indicate the
+proportion of required non-missing values per row. E.g., if
+\code{min_valid = 0.75}, a row must have at least \code{ncol(data) * min_valid}
+non-missing values for the row mean or row sum to be calculated. See
+'Examples'.
 }
 \examples{
 dat <- data.frame(
@@ -125,6 +142,7 @@ row_means(dat, remove_na = TRUE)
 
 # needs at least 4 non-missing values per row
 row_means(dat, min_valid = 4) # 1 valid return value
+row_sums(dat, min_valid = 4) # 1 valid return value
 
 # needs at least 3 non-missing values per row
 row_means(dat, min_valid = 3) # 2 valid return values
@@ -137,6 +155,7 @@ row_means(dat, select = c("c1", "c3"), min_valid = 1)
 
 # needs at least 50\% of non-missing values per row
 row_means(dat, min_valid = 0.5) # 3 valid return values
+row_sums(dat, min_valid = 0.5)
 
 # needs at least 75\% of non-missing values per row
 row_means(dat, min_valid = 0.75) # 2 valid return values
diff --git a/tests/testthat/test-row_means.R b/tests/testthat/test-row_means.R
index 8d0504c69..4db0d7039 100644
--- a/tests/testthat/test-row_means.R
+++ b/tests/testthat/test-row_means.R
@@ -1,4 +1,4 @@
-test_that("row_means", {
+test_that("row_means/sums", {
   d_mn <- data.frame(
     c1 = c(1, 2, NA, 4),
     c2 = c(NA, 2, NA, 5),
@@ -14,14 +14,21 @@ test_that("row_means", {
   expect_equal(row_means(d_mn, min_valid = 2, digits = 1), c(1.5, 2.8, NA, 5.7), tolerance = 1e-1)
   expect_message(row_means(iris), regex = "Only numeric")
   expect_equal(row_means(iris, verbose = FALSE), rowMeans(iris[, 1:4]), tolerance = 1e-3, ignore_attr = TRUE)
+  expect_equal(row_sums(d_mn, min_valid = 4), c(NA, 11, NA, NA), tolerance = 1e-3)
+  expect_equal(row_sums(d_mn, min_valid = 3), c(NA, 11, NA, 17), tolerance = 1e-3)
+  expect_message(row_sums(iris), regex = "Only numeric")
 })
 
-test_that("row_means, errors or messages", {
+test_that("row_means/sums, errors or messages", {
   data(iris)
   expect_error(expect_warning(row_means(iris, select = "abc")), regex = "No columns")
+  expect_error(expect_warning(row_sums(iris, select = "abc")), regex = "No columns")
   expect_error(row_means(iris[1], min_valid = 1), regex = "two numeric")
   expect_error(row_means(iris, min_valid = 1:4), regex = "numeric value")
   expect_error(row_means(iris, min_valid = "a"), regex = "numeric value")
   expect_message(row_means(iris[1:3, ], min_valid = 3), regex = "Only numeric")
   expect_silent(row_means(iris[1:3, ], min_valid = 3, verbose = FALSE))
+  expect_error(row_sums(iris[1], min_valid = 1), regex = "two numeric")
+  expect_message(row_sums(iris[1:3, ], min_valid = 3), regex = "Only numeric")
+  expect_silent(row_sums(iris[1:3, ], min_valid = 3, verbose = FALSE))
 })