From c967b6c9aecfb025e6dae850e037785ba65424cc Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Wed, 20 Mar 2024 18:56:31 +0100
Subject: [PATCH 01/16] Draft `data_expand()`

---
 NAMESPACE          |  1 +
 R/data_expand.R    | 69 ++++++++++++++++++++++++++++++++++++
 _pkgdown.yaml      |  1 +
 man/data_expand.Rd | 88 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 159 insertions(+)
 create mode 100644 R/data_expand.R
 create mode 100644 man/data_expand.Rd

diff --git a/NAMESPACE b/NAMESPACE
index e89863807..afd84cb87 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -228,6 +228,7 @@ export(data_adjust)
 export(data_arrange)
 export(data_codebook)
 export(data_duplicated)
+export(data_expand)
 export(data_extract)
 export(data_filter)
 export(data_find)
diff --git a/R/data_expand.R b/R/data_expand.R
new file mode 100644
index 000000000..e0d528031
--- /dev/null
+++ b/R/data_expand.R
@@ -0,0 +1,69 @@
+#' @title Expand (i.e. replicate rows) a data frame
+#' @name data_expand
+#'
+#' @description
+#' Expand a data frame by replicating rows based on another variable that
+#' contains the counts of replications per row.
+#'
+#' @param data A data frame.
+#' @param expand The name of the column that contains the counts of replications
+#' for each row.
+#' @param ... Currently not used.
+#' @inheritParams find_columns
+#'
+#' @return `data`, with each row replicated as many times as defined in `expand`.
+#'
+#' @examples
+#' data(mtcars)
+#' data_expand(head(mtcars), "carb")
+#' @export
+data_expand <- function(data,
+                        expand = NULL,
+                        select = NULL,
+                        exclude = NULL,
+                        remove_na = FALSE,
+                        ignore_case = FALSE,
+                        verbose = TRUE,
+                        regex = FALSE,
+                        ...) {
+  # we need a name for the new column
+  if (is.null(expand)) {
+    insight::format_error(
+      "No column that should be used to expand the data frame was provided. Please use `expand` to define a column."
+    )
+  }
+
+  # only one column name
+  if (length(expand) > 1) {
+    insight::format_error(
+      "Please provide only a single string for `expand`, no character vector with multiple values."
+    )
+  }
+
+  # check if in data
+  if (!expand %in% colnames(data)) {
+    insight::format_error(
+      "The column provided in `expand` does not exist in the data frame.",
+      .misspelled_string(colnames(data), expand, "Possibly misspelled?")
+    )
+  }
+
+  # evaluate select/exclude, may be select-helpers
+  select <- .select_nse(select,
+    data,
+    exclude,
+    ignore_case,
+    regex = regex,
+    verbose = verbose
+  )
+
+  # extract variable that contains the counts of replicates
+  replicates <- data[[expand]]
+  # we can remove that column now
+  data[[replicates]] <- NULL
+
+  # fin
+  as.data.frame(do.call(cbind, lapply(data[select], function(variable) {
+    unlist(Map(rep, variable, replicates), use.names = FALSE)
+  })))
+}
diff --git a/_pkgdown.yaml b/_pkgdown.yaml
index 2adc0768c..93043c95a 100644
--- a/_pkgdown.yaml
+++ b/_pkgdown.yaml
@@ -20,6 +20,7 @@ reference:
       - data_partition
       - data_rotate
       - data_group
+      - data_expand
       - data_duplicated
       - data_unique
 
diff --git a/man/data_expand.Rd b/man/data_expand.Rd
new file mode 100644
index 000000000..b3f14e19d
--- /dev/null
+++ b/man/data_expand.Rd
@@ -0,0 +1,88 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_expand.R
+\name{data_expand}
+\alias{data_expand}
+\title{Expand (i.e. replicate rows) a data frame}
+\usage{
+data_expand(
+  data,
+  expand = NULL,
+  select = NULL,
+  exclude = NULL,
+  remove_na = FALSE,
+  ignore_case = FALSE,
+  verbose = TRUE,
+  regex = FALSE,
+  ...
+)
+}
+\arguments{
+\item{data}{A data frame.}
+
+\item{expand}{The name of the column that contains the counts of replications
+for each row.}
+
+\item{select}{Variables that will be included when performing the required
+tasks. Can be either
+\itemize{
+\item a variable specified as a literal variable name (e.g., \code{column_name}),
+\item a string with the variable name (e.g., \code{"column_name"}), or a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a formula with variable names (e.g., \code{~column_1 + column_2}),
+\item a vector of positive integers, giving the positions counting from the left
+(e.g. \code{1} or \code{c(1, 3, 5)}),
+\item a vector of negative integers, giving the positions counting from the
+right (e.g., \code{-1} or \code{-1:-3}),
+\item one of the following select-helpers: \code{starts_with()}, \code{ends_with()},
+\code{contains()}, a range using \code{:} or \code{regex("")}. \code{starts_with()},
+\code{ends_with()}, and  \code{contains()} accept several patterns, e.g
+\code{starts_with("Sep", "Petal")}.
+\item or a function testing for logical conditions, e.g. \code{is.numeric()} (or
+\code{is.numeric}), or any user-defined function that selects the variables
+for which the function returns \code{TRUE} (like: \code{foo <- function(x) mean(x) > 3}),
+\item ranges specified via literal variable names, select-helpers (except
+\code{regex()}) and (user-defined) functions can be negated, i.e. return
+non-matching elements, when prefixed with a \code{-}, e.g. \code{-ends_with("")},
+\code{-is.numeric} or \code{-(Sepal.Width:Petal.Length)}. \strong{Note:} Negation means
+that matches are \emph{excluded}, and thus, the \code{exclude} argument can be
+used alternatively. For instance, \code{select=-ends_with("Length")} (with
+\code{-}) is equivalent to \code{exclude=ends_with("Length")} (no \code{-}). In case
+negation should not work as expected, use the \code{exclude} argument instead.
+}
+
+If \code{NULL}, selects all columns. Patterns that found no matches are silently
+ignored, e.g. \code{find_columns(iris, select = c("Species", "Test"))} will just
+return \code{"Species"}.}
+
+\item{exclude}{See \code{select}, however, column names matched by the pattern
+from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default),
+excludes no columns.}
+
+\item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or
+a regular expression is used in \code{select}, ignores lower/upper case in the
+search pattern when matching against variable names.}
+
+\item{verbose}{Toggle warnings.}
+
+\item{regex}{Logical, if \code{TRUE}, the search pattern from \code{select} will be
+treated as regular expression. When \code{regex = TRUE}, select \emph{must} be a
+character string (or a variable containing a character string) and is not
+allowed to be one of the supported select-helpers or a character vector
+of length > 1. \code{regex = TRUE} is comparable to using one of the two
+select-helpers, \code{select = contains("")} or \code{select = regex("")}, however,
+since the select-helpers may not work when called from inside other
+functions (see 'Details'), this argument may be used as workaround.}
+
+\item{...}{Currently not used.}
+}
+\value{
+\code{data}, with each row replicated as many times as defined in \code{expand}.
+}
+\description{
+Expand a data frame by replicating rows based on another variable that
+contains the counts of replications per row.
+}
+\examples{
+data(mtcars)
+data_expand(head(mtcars), "carb")
+}

From 1ccfd420db053c59835b160774507e5826b149b0 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Wed, 20 Mar 2024 19:03:51 +0100
Subject: [PATCH 02/16] fix

---
 R/data_expand.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/data_expand.R b/R/data_expand.R
index e0d528031..359f9a6d6 100644
--- a/R/data_expand.R
+++ b/R/data_expand.R
@@ -60,7 +60,10 @@ data_expand <- function(data,
   # extract variable that contains the counts of replicates
   replicates <- data[[expand]]
   # we can remove that column now
-  data[[replicates]] <- NULL
+  data[[expand]] <- NULL
+
+  # also remove "expand" from "select" string
+  select <- setdiff(select, expand)
 
   # fin
   as.data.frame(do.call(cbind, lapply(data[select], function(variable) {

From ef5e1e656bf5806b6c04bd10ebf661e8349ee9e0 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Wed, 20 Mar 2024 19:19:12 +0100
Subject: [PATCH 03/16] add tests

---
 R/data_expand.R                   | 16 ++++++++++++++
 man/data_expand.Rd                |  4 ++++
 tests/testthat/test-data_expand.R | 35 +++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)
 create mode 100644 tests/testthat/test-data_expand.R

diff --git a/R/data_expand.R b/R/data_expand.R
index 359f9a6d6..18fe0545e 100644
--- a/R/data_expand.R
+++ b/R/data_expand.R
@@ -8,6 +8,9 @@
 #' @param data A data frame.
 #' @param expand The name of the column that contains the counts of replications
 #' for each row.
+#' @param remove_na Logical, if `TRUE`, missing values (`NA`) in the column
+#' provided in `expand` are removed from the data frame. If `FALSE` and `expand`
+#' contains missing values, the function will throw an error.
 #' @param ... Currently not used.
 #' @inheritParams find_columns
 #'
@@ -65,6 +68,19 @@ data_expand <- function(data,
   # also remove "expand" from "select" string
   select <- setdiff(select, expand)
 
+  # if user doesn't want to remove "NA", but replicates contain "NA",
+  # give informative error here
+  if (!remove_na && anyNA(replicates)) {
+    insight::format_error(
+      "The column provided in `expand` contains missing values, but `remove_na` is set to `FALSE`.",
+      "Please set `remove_na` to `TRUE` or remove the missing values from the data frame."
+    )
+  }
+
+  # remove rows where "expand" is NA
+  data <- data[!is.na(replicates), ]
+  replicates <- replicates[!is.na(replicates)]
+
   # fin
   as.data.frame(do.call(cbind, lapply(data[select], function(variable) {
     unlist(Map(rep, variable, replicates), use.names = FALSE)
diff --git a/man/data_expand.Rd b/man/data_expand.Rd
index b3f14e19d..0340b89d9 100644
--- a/man/data_expand.Rd
+++ b/man/data_expand.Rd
@@ -58,6 +58,10 @@ return \code{"Species"}.}
 from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default),
 excludes no columns.}
 
+\item{remove_na}{Logical, if \code{TRUE}, missing values (\code{NA}) in the column
+provided in \code{expand} are removed from the data frame. If \code{FALSE} and \code{expand}
+contains missing values, the function will throw an error.}
+
 \item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or
 a regular expression is used in \code{select}, ignores lower/upper case in the
 search pattern when matching against variable names.}
diff --git a/tests/testthat/test-data_expand.R b/tests/testthat/test-data_expand.R
new file mode 100644
index 000000000..9bb5b8196
--- /dev/null
+++ b/tests/testthat/test-data_expand.R
@@ -0,0 +1,35 @@
+test_that("data_expand: simple use case", {
+  data(mtcars)
+  d <- as.data.frame(head(mtcars))
+  out <- data_expand(d, "carb")
+  expect_identical(dim(out), c(13L, 10L))
+  expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 108, 258, 360, 360, 225))
+  expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
+
+  d$mpg[5] <- NA
+  out <- data_expand(d, "carb")
+  expect_identical(dim(out), c(13L, 10L))
+  expect_identical(out$mpg, c(21, 21, 21, 21, 21, 21, 21, 21, 22.8, 21.4, NA, NA, 18.1))
+  expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
+
+  d$carb[3] <- NA
+  out <- data_expand(d, "carb", remove_na = TRUE)
+  expect_identical(dim(out), c(12L, 10L))
+  expect_identical(out$mpg, c(21, 21, 21, 21, 21, 21, 21, 21, 21.4, NA, NA, 18.1))
+  expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
+
+  out <- data_expand(d, "carb", select = c("disp", "hp"), remove_na = TRUE)
+  expect_identical(dim(out), c(12L, 2L))
+  expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 258, 360, 360, 225))
+  expect_named(out, c("disp", "hp"))
+})
+
+test_that("data_expand: errors", {
+  data(mtcars)
+  d <- as.data.frame(head(mtcars))
+  expect_error(data_expand(d), regex = "No column")
+  expect_error(data_expand(d, expand = c("mpg", "gear")), regex = "a single string")
+  expect_error(data_expand(d, expand = "geas"), regex = "The column provided")
+  d$carb[3] <- NA
+  expect_error(data_expand(d, "carb"), regex = "missing values")
+})

From b83e2d85b199a1ba243dc2ae1066fc43b978c095 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Wed, 20 Mar 2024 19:35:39 +0100
Subject: [PATCH 04/16] news, desc

---
 DESCRIPTION | 2 +-
 NEWS.md     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a3b81a578..a51982765 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.9.1.7
+Version: 0.9.1.8
 Authors@R: c(
     person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
diff --git a/NEWS.md b/NEWS.md
index d78e81ee9..2e806b9ac 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -12,6 +12,9 @@ NEW FUNCTIONS
 
 * `data_summary()`, to compute summary statistics of (grouped) data frames.
 
+* `data_expand()`, to expand a data frame by replicating rows based on another
+  variable that contains the counts of replications per row.
+
 CHANGES
 
 * `data_modify()` gets three new arguments, `.at`, `.if` and `.modify`, to modify

From 49b4527c0b1380eae92792f64df59ad38d795b03 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 22 Mar 2024 15:58:19 +0100
Subject: [PATCH 05/16] Update R/data_expand.R

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 R/data_expand.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/data_expand.R b/R/data_expand.R
index 18fe0545e..f3b1bf72d 100644
--- a/R/data_expand.R
+++ b/R/data_expand.R
@@ -8,7 +8,7 @@
 #' @param data A data frame.
 #' @param expand The name of the column that contains the counts of replications
 #' for each row.
-#' @param remove_na Logical, if `TRUE`, missing values (`NA`) in the column
+#' @param remove_na Logical. If `TRUE`, missing values in the column
 #' provided in `expand` are removed from the data frame. If `FALSE` and `expand`
 #' contains missing values, the function will throw an error.
 #' @param ... Currently not used.

From 500bdb956786ff6be67ec55048cd823abdce10ff Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 22 Mar 2024 15:58:31 +0100
Subject: [PATCH 06/16] Update R/data_expand.R

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 R/data_expand.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/data_expand.R b/R/data_expand.R
index f3b1bf72d..36d22a148 100644
--- a/R/data_expand.R
+++ b/R/data_expand.R
@@ -14,7 +14,7 @@
 #' @param ... Currently not used.
 #' @inheritParams find_columns
 #'
-#' @return `data`, with each row replicated as many times as defined in `expand`.
+#' @return A dataframe with each row replicated as many times as defined in `expand`.
 #'
 #' @examples
 #' data(mtcars)

From bb14d432a1d16822144b032e0f2deee22a2c79e9 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 22 Mar 2024 15:58:46 +0100
Subject: [PATCH 07/16] Update R/data_expand.R

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 R/data_expand.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/data_expand.R b/R/data_expand.R
index 36d22a148..c83389124 100644
--- a/R/data_expand.R
+++ b/R/data_expand.R
@@ -73,7 +73,7 @@ data_expand <- function(data,
   if (!remove_na && anyNA(replicates)) {
     insight::format_error(
       "The column provided in `expand` contains missing values, but `remove_na` is set to `FALSE`.",
-      "Please set `remove_na` to `TRUE` or remove the missing values from the data frame."
+      "Please set `remove_na` to `TRUE` or remove the missing values from the `expand` variable."
     )
   }
 

From 623b626b4a883272937ceba3a4a8f70f2e465be9 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 22 Mar 2024 15:58:55 +0100
Subject: [PATCH 08/16] Update tests/testthat/test-data_expand.R

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 tests/testthat/test-data_expand.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-data_expand.R b/tests/testthat/test-data_expand.R
index 9bb5b8196..330acb20b 100644
--- a/tests/testthat/test-data_expand.R
+++ b/tests/testthat/test-data_expand.R
@@ -26,7 +26,7 @@ test_that("data_expand: simple use case", {
 
 test_that("data_expand: errors", {
   data(mtcars)
-  d <- as.data.frame(head(mtcars))
+  d <- head(mtcars)
   expect_error(data_expand(d), regex = "No column")
   expect_error(data_expand(d, expand = c("mpg", "gear")), regex = "a single string")
   expect_error(data_expand(d, expand = "geas"), regex = "The column provided")

From ec881c41f932f80bd9fb926e7b8671522d712b8a Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 22 Mar 2024 15:59:06 +0100
Subject: [PATCH 09/16] Update tests/testthat/test-data_expand.R

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 tests/testthat/test-data_expand.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-data_expand.R b/tests/testthat/test-data_expand.R
index 330acb20b..8122d8ee9 100644
--- a/tests/testthat/test-data_expand.R
+++ b/tests/testthat/test-data_expand.R
@@ -1,6 +1,6 @@
 test_that("data_expand: simple use case", {
   data(mtcars)
-  d <- as.data.frame(head(mtcars))
+  d <- head(mtcars)
   out <- data_expand(d, "carb")
   expect_identical(dim(out), c(13L, 10L))
   expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 108, 258, 360, 360, 225))

From a9f0ec6c74a7ae286b5809b37c7499b890a30d21 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 22 Mar 2024 22:39:32 +0100
Subject: [PATCH 10/16] rename

---
 NEWS.md                                       |  2 +-
 R/{data_expand.R => data_replicate.R}         | 10 ++++------
 _pkgdown.yaml                                 |  2 +-
 ...st-data_expand.R => test-data_replicate.R} | 20 +++++++++----------
 4 files changed, 16 insertions(+), 18 deletions(-)
 rename R/{data_expand.R => data_replicate.R} (91%)
 rename tests/testthat/{test-data_expand.R => test-data_replicate.R} (62%)

diff --git a/NEWS.md b/NEWS.md
index 2e806b9ac..6fa9b1219 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -12,7 +12,7 @@ NEW FUNCTIONS
 
 * `data_summary()`, to compute summary statistics of (grouped) data frames.
 
-* `data_expand()`, to expand a data frame by replicating rows based on another
+* `data_replicate()`, to expand a data frame by replicating rows based on another
   variable that contains the counts of replications per row.
 
 CHANGES
diff --git a/R/data_expand.R b/R/data_replicate.R
similarity index 91%
rename from R/data_expand.R
rename to R/data_replicate.R
index c83389124..8e59cb9e3 100644
--- a/R/data_expand.R
+++ b/R/data_replicate.R
@@ -1,5 +1,5 @@
 #' @title Expand (i.e. replicate rows) a data frame
-#' @name data_expand
+#' @name data_replicate
 #'
 #' @description
 #' Expand a data frame by replicating rows based on another variable that
@@ -18,9 +18,9 @@
 #'
 #' @examples
 #' data(mtcars)
-#' data_expand(head(mtcars), "carb")
+#' data_replicate(head(mtcars), "carb")
 #' @export
-data_expand <- function(data,
+data_replicate <- function(data,
                         expand = NULL,
                         select = NULL,
                         exclude = NULL,
@@ -82,7 +82,5 @@ data_expand <- function(data,
   replicates <- replicates[!is.na(replicates)]
 
   # fin
-  as.data.frame(do.call(cbind, lapply(data[select], function(variable) {
-    unlist(Map(rep, variable, replicates), use.names = FALSE)
-  })))
+  as.data.frame(do.call(cbind, lapply(data[select], rep.int, times = replicates)))
 }
diff --git a/_pkgdown.yaml b/_pkgdown.yaml
index 3dfeeefb9..65bae30c8 100644
--- a/_pkgdown.yaml
+++ b/_pkgdown.yaml
@@ -20,7 +20,7 @@ reference:
       - data_partition
       - data_rotate
       - data_group
-      - data_expand
+      - data_replicate
       - data_duplicated
       - data_unique
 
diff --git a/tests/testthat/test-data_expand.R b/tests/testthat/test-data_replicate.R
similarity index 62%
rename from tests/testthat/test-data_expand.R
rename to tests/testthat/test-data_replicate.R
index 8122d8ee9..17be2dd89 100644
--- a/tests/testthat/test-data_expand.R
+++ b/tests/testthat/test-data_replicate.R
@@ -1,35 +1,35 @@
-test_that("data_expand: simple use case", {
+test_that("data_replicate: simple use case", {
   data(mtcars)
   d <- head(mtcars)
-  out <- data_expand(d, "carb")
+  out <- data_replicate(d, "carb")
   expect_identical(dim(out), c(13L, 10L))
   expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 108, 258, 360, 360, 225))
   expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
 
   d$mpg[5] <- NA
-  out <- data_expand(d, "carb")
+  out <- data_replicate(d, "carb")
   expect_identical(dim(out), c(13L, 10L))
   expect_identical(out$mpg, c(21, 21, 21, 21, 21, 21, 21, 21, 22.8, 21.4, NA, NA, 18.1))
   expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
 
   d$carb[3] <- NA
-  out <- data_expand(d, "carb", remove_na = TRUE)
+  out <- data_replicate(d, "carb", remove_na = TRUE)
   expect_identical(dim(out), c(12L, 10L))
   expect_identical(out$mpg, c(21, 21, 21, 21, 21, 21, 21, 21, 21.4, NA, NA, 18.1))
   expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
 
-  out <- data_expand(d, "carb", select = c("disp", "hp"), remove_na = TRUE)
+  out <- data_replicate(d, "carb", select = c("disp", "hp"), remove_na = TRUE)
   expect_identical(dim(out), c(12L, 2L))
   expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 258, 360, 360, 225))
   expect_named(out, c("disp", "hp"))
 })
 
-test_that("data_expand: errors", {
+test_that("data_replicate: errors", {
   data(mtcars)
   d <- head(mtcars)
-  expect_error(data_expand(d), regex = "No column")
-  expect_error(data_expand(d, expand = c("mpg", "gear")), regex = "a single string")
-  expect_error(data_expand(d, expand = "geas"), regex = "The column provided")
+  expect_error(data_replicate(d), regex = "No column")
+  expect_error(data_replicate(d, expand = c("mpg", "gear")), regex = "a single string")
+  expect_error(data_replicate(d, expand = "geas"), regex = "The column provided")
   d$carb[3] <- NA
-  expect_error(data_expand(d, "carb"), regex = "missing values")
+  expect_error(data_replicate(d, "carb"), regex = "missing values")
 })

From afc090f9b4fb30b6530e80b48737b62187a5ec69 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Fri, 22 Mar 2024 22:53:01 +0100
Subject: [PATCH 11/16] update namespace and RD

---
 NAMESPACE                                 |  2 +-
 man/{data_expand.Rd => data_replicate.Rd} | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)
 rename man/{data_expand.Rd => data_replicate.Rd} (92%)

diff --git a/NAMESPACE b/NAMESPACE
index a0a9d4788..eea4e22a4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -233,7 +233,6 @@ export(data_adjust)
 export(data_arrange)
 export(data_codebook)
 export(data_duplicated)
-export(data_expand)
 export(data_extract)
 export(data_filter)
 export(data_find)
@@ -250,6 +249,7 @@ export(data_remove)
 export(data_rename)
 export(data_rename_rows)
 export(data_reorder)
+export(data_replicate)
 export(data_restoretype)
 export(data_rotate)
 export(data_seek)
diff --git a/man/data_expand.Rd b/man/data_replicate.Rd
similarity index 92%
rename from man/data_expand.Rd
rename to man/data_replicate.Rd
index 0340b89d9..04429d0f1 100644
--- a/man/data_expand.Rd
+++ b/man/data_replicate.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data_expand.R
-\name{data_expand}
-\alias{data_expand}
+% Please edit documentation in R/data_replicate.R
+\name{data_replicate}
+\alias{data_replicate}
 \title{Expand (i.e. replicate rows) a data frame}
 \usage{
-data_expand(
+data_replicate(
   data,
   expand = NULL,
   select = NULL,
@@ -58,7 +58,7 @@ return \code{"Species"}.}
 from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default),
 excludes no columns.}
 
-\item{remove_na}{Logical, if \code{TRUE}, missing values (\code{NA}) in the column
+\item{remove_na}{Logical. If \code{TRUE}, missing values in the column
 provided in \code{expand} are removed from the data frame. If \code{FALSE} and \code{expand}
 contains missing values, the function will throw an error.}
 
@@ -80,7 +80,7 @@ functions (see 'Details'), this argument may be used as workaround.}
 \item{...}{Currently not used.}
 }
 \value{
-\code{data}, with each row replicated as many times as defined in \code{expand}.
+A dataframe with each row replicated as many times as defined in \code{expand}.
 }
 \description{
 Expand a data frame by replicating rows based on another variable that
@@ -88,5 +88,5 @@ contains the counts of replications per row.
 }
 \examples{
 data(mtcars)
-data_expand(head(mtcars), "carb")
+data_replicate(head(mtcars), "carb")
 }

From 074c807aa701dc034466fe77ab23c5a9e5d7ab83 Mon Sep 17 00:00:00 2001
From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
Date: Sat, 23 Mar 2024 10:29:54 +0100
Subject: [PATCH 12/16] make styler happy [skip ci]

---
 R/data_replicate.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/data_replicate.R b/R/data_replicate.R
index 8e59cb9e3..0cf54329a 100644
--- a/R/data_replicate.R
+++ b/R/data_replicate.R
@@ -21,14 +21,14 @@
 #' data_replicate(head(mtcars), "carb")
 #' @export
 data_replicate <- function(data,
-                        expand = NULL,
-                        select = NULL,
-                        exclude = NULL,
-                        remove_na = FALSE,
-                        ignore_case = FALSE,
-                        verbose = TRUE,
-                        regex = FALSE,
-                        ...) {
+                           expand = NULL,
+                           select = NULL,
+                           exclude = NULL,
+                           remove_na = FALSE,
+                           ignore_case = FALSE,
+                           verbose = TRUE,
+                           regex = FALSE,
+                           ...) {
   # we need a name for the new column
   if (is.null(expand)) {
     insight::format_error(

From 5de51d9c0c5f0a452d9304b4e0a4447e7e14adfb Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sat, 23 Mar 2024 11:09:23 +0100
Subject: [PATCH 13/16] check for integer

---
 R/data_replicate.R                   | 28 +++++++++++++++++++++++++++-
 man/data_replicate.Rd                |  3 ++-
 tests/testthat/test-data_replicate.R |  6 ++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/R/data_replicate.R b/R/data_replicate.R
index 0cf54329a..189b71f25 100644
--- a/R/data_replicate.R
+++ b/R/data_replicate.R
@@ -7,7 +7,8 @@
 #'
 #' @param data A data frame.
 #' @param expand The name of the column that contains the counts of replications
-#' for each row.
+#' for each row. Can also be a numeric value, indicating the position of that
+#' column. Note that the variable indicated by `expand` must be an integer vector.
 #' @param remove_na Logical. If `TRUE`, missing values in the column
 #' provided in `expand` are removed from the data frame. If `FALSE` and `expand`
 #' contains missing values, the function will throw an error.
@@ -43,6 +44,11 @@ data_replicate <- function(data,
     )
   }
 
+  # check if numerics, and if so, use column name
+  if (is.numeric(expand)) {
+    expand <- colnames(data)[expand]
+  }
+
   # check if in data
   if (!expand %in% colnames(data)) {
     insight::format_error(
@@ -51,6 +57,13 @@ data_replicate <- function(data,
     )
   }
 
+  # check that "expand" is integer
+  if (!.is_integer(data[[expand]])) {
+    insight::format_error(
+      "The column provided in `expand` is not of type integer. Please provide a column that contains integer values." # nolint
+    )
+  }
+
   # evaluate select/exclude, may be select-helpers
   select <- .select_nse(select,
     data,
@@ -84,3 +97,16 @@ data_replicate <- function(data,
   # fin
   as.data.frame(do.call(cbind, lapply(data[select], rep.int, times = replicates)))
 }
+
+
+.is_integer <- function(x) {
+  tryCatch(
+    if (is.infinite(x)) {
+      FALSE
+    } else {
+      all(x %% 1 == 0)
+    },
+    warning = function(w) is.integer(x),
+    error = function(e) FALSE
+  )
+}
diff --git a/man/data_replicate.Rd b/man/data_replicate.Rd
index 04429d0f1..4c152b371 100644
--- a/man/data_replicate.Rd
+++ b/man/data_replicate.Rd
@@ -20,7 +20,8 @@ data_replicate(
 \item{data}{A data frame.}
 
 \item{expand}{The name of the column that contains the counts of replications
-for each row.}
+for each row. Can also be a numeric value, indicating the position of that
+column. Note that the variable indicated by \code{expand} must be an integer vector.}
 
 \item{select}{Variables that will be included when performing the required
 tasks. Can be either
diff --git a/tests/testthat/test-data_replicate.R b/tests/testthat/test-data_replicate.R
index 17be2dd89..8967a67d2 100644
--- a/tests/testthat/test-data_replicate.R
+++ b/tests/testthat/test-data_replicate.R
@@ -6,6 +6,11 @@ test_that("data_replicate: simple use case", {
   expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 108, 258, 360, 360, 225))
   expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
 
+  out <- data_replicate(d, 11)
+  expect_identical(dim(out), c(13L, 10L))
+  expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 108, 258, 360, 360, 225))
+  expect_named(out, c("mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear"))
+
   d$mpg[5] <- NA
   out <- data_replicate(d, "carb")
   expect_identical(dim(out), c(13L, 10L))
@@ -30,6 +35,7 @@ test_that("data_replicate: errors", {
   expect_error(data_replicate(d), regex = "No column")
   expect_error(data_replicate(d, expand = c("mpg", "gear")), regex = "a single string")
   expect_error(data_replicate(d, expand = "geas"), regex = "The column provided")
+  expect_error(data_replicate(d, expand = "qsec"), regex = "The column provided")
   d$carb[3] <- NA
   expect_error(data_replicate(d, "carb"), regex = "missing values")
 })

From c2ccd129516dcb9c724d264e194e16cf1da63c89 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sat, 23 Mar 2024 11:16:32 +0100
Subject: [PATCH 14/16] fix

---
 R/data_replicate.R                   | 13 ++++++++-----
 tests/testthat/test-data_replicate.R | 13 +++++++++++++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/R/data_replicate.R b/R/data_replicate.R
index 189b71f25..b61f2de6f 100644
--- a/R/data_replicate.R
+++ b/R/data_replicate.R
@@ -57,6 +57,13 @@ data_replicate <- function(data,
     )
   }
 
+  # check that "expand" contains no Inf
+  if (any(is.infinite(data[[expand]]))) {
+    insight::format_error(
+      "The column provided in `expand` contains infinite values. Please provide a column that does not contain infinite values." # nolint
+    )
+  }
+
   # check that "expand" is integer
   if (!.is_integer(data[[expand]])) {
     insight::format_error(
@@ -101,11 +108,7 @@ data_replicate <- function(data,
 
 .is_integer <- function(x) {
   tryCatch(
-    if (is.infinite(x)) {
-      FALSE
-    } else {
-      all(x %% 1 == 0)
-    },
+    all(x %% 1 == 0),
     warning = function(w) is.integer(x),
     error = function(e) FALSE
   )
diff --git a/tests/testthat/test-data_replicate.R b/tests/testthat/test-data_replicate.R
index 8967a67d2..92c514b5d 100644
--- a/tests/testthat/test-data_replicate.R
+++ b/tests/testthat/test-data_replicate.R
@@ -27,8 +27,18 @@ test_that("data_replicate: simple use case", {
   expect_identical(dim(out), c(12L, 2L))
   expect_identical(out$disp, c(160, 160, 160, 160, 160, 160, 160, 160, 258, 360, 360, 225))
   expect_named(out, c("disp", "hp"))
+
+  d <- data.frame(
+    a = c("a", "b", "c"),
+    b = 1:3,
+    rep = c(3, 2, 4),
+    stringsAsFactors = FALSE
+  )
+  out <- data_replicate(d, "rep")
+  expect_identical(out$a, c("a", "a", "a", "b", "b", "c", "c", "c", "c"))
 })
 
+
 test_that("data_replicate: errors", {
   data(mtcars)
   d <- head(mtcars)
@@ -38,4 +48,7 @@ test_that("data_replicate: errors", {
   expect_error(data_replicate(d, expand = "qsec"), regex = "The column provided")
   d$carb[3] <- NA
   expect_error(data_replicate(d, "carb"), regex = "missing values")
+  d <- head(mtcars)
+  d$carb[3] <- Inf
+  expect_error(data_replicate(d, "carb"), regex = "infinite values")
 })

From 0f87577d646f051eed3d5bee6ad405157019af88 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sat, 23 Mar 2024 11:25:49 +0100
Subject: [PATCH 15/16] Update data_replicate.R

---
 R/data_replicate.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/data_replicate.R b/R/data_replicate.R
index b61f2de6f..a31afcbe8 100644
--- a/R/data_replicate.R
+++ b/R/data_replicate.R
@@ -106,7 +106,10 @@ data_replicate <- function(data,
 }
 
 
-.is_integer <- function(x) {
+.is_integer <- function(x, remove_na = TRUE) {
+  if (remove_na) {
+    x <- x[!is.na(x)]
+  }
   tryCatch(
     all(x %% 1 == 0),
     warning = function(w) is.integer(x),

From caf70e1fff2e08ba03ceb3d66ae94cb02fd2033c Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sat, 23 Mar 2024 14:01:02 +0100
Subject: [PATCH 16/16] Update R/data_replicate.R

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 R/data_replicate.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/data_replicate.R b/R/data_replicate.R
index a31afcbe8..4ea6f998e 100644
--- a/R/data_replicate.R
+++ b/R/data_replicate.R
@@ -106,6 +106,8 @@ data_replicate <- function(data,
 }
 
 
+# is.integer(c(1, 2)) -> FALSE
+# all(c(1, 2) %% 1 == 0) -> TRUE
 .is_integer <- function(x, remove_na = TRUE) {
   if (remove_na) {
     x <- x[!is.na(x)]