diff --git a/.Rbuildignore b/.Rbuildignore
index d245bf8c3..65a0a7f67 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -48,3 +48,5 @@ references.bib
 ^CRAN-SUBMISSION$
 docs
 ^.dev$
+^vignettes/s.
+^vignettes/t.
diff --git a/DESCRIPTION b/DESCRIPTION
index 08e9c4c08..807894879 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,30 +1,30 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.11.0.2
+Version: 0.13.0.13
 Authors@R: c(
     person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
-           comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
+           comment = c(ORCID = "0000-0003-1995-6531")),
     person("Etienne", "Bacher", , "etienne.bacher@protonmail.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-9271-5075")),
     person("Dominique", "Makowski", , "dom.makowski@gmail.com", role = "aut",
-           comment = c(ORCID = "0000-0001-5375-9967", Twitter = "@Dom_Makowski")),
+           comment = c(ORCID = "0000-0001-5375-9967")),
     person("Daniel", "Lüdecke", , "d.luedecke@uke.de", role = "aut",
-           comment = c(ORCID = "0000-0002-8895-3206", Twitter = "@strengejacke")),
+           comment = c(ORCID = "0000-0002-8895-3206")),
     person("Mattan S.", "Ben-Shachar", , "matanshm@post.bgu.ac.il", role = "aut",
            comment = c(ORCID = "0000-0002-4287-4801")),
     person("Brenton M.", "Wiernik", , "brenton@wiernik.org", role = "aut",
-           comment = c(ORCID = "0000-0001-9560-6336", Twitter = "@bmwiernik")),
+           comment = c(ORCID = "0000-0001-9560-6336")),
     person("Rémi", "Thériault", , "remi.theriault@mail.mcgill.ca", role = "ctb",
-           comment = c(ORCID = "0000-0003-4315-6788", Twitter = "@rempsyc")),
+           comment = c(ORCID = "0000-0003-4315-6788")),
     person("Thomas J.", "Faulkenberry", , "faulkenberry@tarleton.edu", role = "rev"),
     person("Robert", "Garrett", , "rcg4@illinois.edu", role = "rev")
   )
 Maintainer: Etienne Bacher <etienne.bacher@protonmail.com>
-Description: A lightweight package to assist in key steps involved in any data 
-    analysis workflow: (1) wrangling the raw data to get it in the needed form, 
-    (2) applying preprocessing steps and statistical transformations, and 
-    (3) compute statistical summaries of data properties and distributions. 
+Description: A lightweight package to assist in key steps involved in any data
+    analysis workflow: (1) wrangling the raw data to get it in the needed form,
+    (2) applying preprocessing steps and statistical transformations, and
+    (3) compute statistical summaries of data properties and distributions.
     It is also the data wrangling backend for packages in 'easystats' ecosystem.
     References: Patil et al. (2022) <doi:10.21105/joss.04684>.
 License: MIT + file LICENSE
@@ -33,10 +33,10 @@ BugReports: https://github.com/easystats/datawizard/issues
 Depends:
     R (>= 4.0)
 Imports:
-    insight (>= 0.20.0),
+    insight (>= 0.20.5),
     stats,
     utils
-Suggests: 
+Suggests:
     bayestestR,
     boot,
     brms,
@@ -49,7 +49,6 @@ Suggests:
     ggplot2 (>= 3.5.0),
     gt,
     haven,
-    htmltools,
     httr,
     knitr,
     lme4,
@@ -68,12 +67,13 @@ Suggests:
     tibble,
     tidyr,
     withr
-VignetteBuilder: 
+VignetteBuilder:
     knitr
 Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Config/testthat/edition: 3
 Config/testthat/parallel: true
 Config/Needs/website: easystats/easystatstemplate
+Remotes: easystats/insight
diff --git a/NAMESPACE b/NAMESPACE
index 5926d19ab..7e97817b9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(as.data.frame,datawizard_crosstabs)
+S3method(as.data.frame,datawizard_tables)
 S3method(as.double,parameters_kurtosis)
 S3method(as.double,parameters_skewness)
 S3method(as.double,parameters_smoothness)
@@ -69,9 +71,9 @@ S3method(describe_distribution,grouped_df)
 S3method(describe_distribution,list)
 S3method(describe_distribution,numeric)
 S3method(format,data_codebook)
+S3method(format,datawizard_crosstab)
+S3method(format,datawizard_table)
 S3method(format,dw_data_peek)
-S3method(format,dw_data_tabulate)
-S3method(format,dw_data_xtabulate)
 S3method(format,dw_groupmeans)
 S3method(format,parameters_distribution)
 S3method(kurtosis,data.frame)
@@ -93,12 +95,12 @@ S3method(normalize,numeric)
 S3method(plot,visualisation_recipe)
 S3method(print,data_codebook)
 S3method(print,data_seek)
+S3method(print,datawizard_crosstab)
+S3method(print,datawizard_crosstabs)
+S3method(print,datawizard_table)
+S3method(print,datawizard_tables)
 S3method(print,dw_data_peek)
 S3method(print,dw_data_summary)
-S3method(print,dw_data_tabulate)
-S3method(print,dw_data_tabulates)
-S3method(print,dw_data_xtabulate)
-S3method(print,dw_data_xtabulates)
 S3method(print,dw_groupmeans)
 S3method(print,dw_groupmeans_list)
 S3method(print,dw_transformer)
@@ -107,16 +109,16 @@ S3method(print,parameters_kurtosis)
 S3method(print,parameters_skewness)
 S3method(print,visualisation_recipe)
 S3method(print_html,data_codebook)
+S3method(print_html,datawizard_crosstab)
+S3method(print_html,datawizard_crosstabs)
+S3method(print_html,datawizard_table)
+S3method(print_html,datawizard_tables)
 S3method(print_html,dw_data_peek)
-S3method(print_html,dw_data_tabulate)
-S3method(print_html,dw_data_tabulates)
-S3method(print_html,dw_data_xtabulate)
-S3method(print_html,dw_data_xtabulates)
 S3method(print_md,data_codebook)
+S3method(print_md,datawizard_crosstab)
+S3method(print_md,datawizard_table)
+S3method(print_md,datawizard_tables)
 S3method(print_md,dw_data_peek)
-S3method(print_md,dw_data_tabulate)
-S3method(print_md,dw_data_tabulates)
-S3method(print_md,dw_data_xtabulate)
 S3method(ranktransform,data.frame)
 S3method(ranktransform,factor)
 S3method(ranktransform,grouped_df)
@@ -218,7 +220,6 @@ export(assign_labels)
 export(categorize)
 export(center)
 export(centre)
-export(change_code)
 export(change_scale)
 export(coef_var)
 export(coerce_to_numeric)
@@ -235,7 +236,6 @@ export(data_codebook)
 export(data_duplicated)
 export(data_extract)
 export(data_filter)
-export(data_find)
 export(data_group)
 export(data_join)
 export(data_match)
@@ -274,8 +274,6 @@ export(empty_columns)
 export(empty_rows)
 export(extract_column_names)
 export(find_columns)
-export(format_text)
-export(get_columns)
 export(kurtosis)
 export(labels_to_levels)
 export(mean_sd)
@@ -298,7 +296,9 @@ export(reshape_longer)
 export(reshape_wider)
 export(reverse)
 export(reverse_scale)
+export(row_count)
 export(row_means)
+export(row_sums)
 export(row_to_colnames)
 export(rowid_as_column)
 export(rownames_as_column)
diff --git a/NEWS.md b/NEWS.md
index 1e49e91a4..663efa310 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,10 +1,115 @@
-# datawizard 0.11.0.1
+# datawizard (development)
 
-## Changes
+BREAKING CHANGES
+
+* Argument `drop_na` in `data_match()` is deprecated now. Please use `remove_na`
+  instead.
+
+CHANGES
+
+* The `select` argument, which is available in different functions to select
+  variables, can now also be a character vector with quoted variable names,
+  including a colon to indicate a range of several variables (e.g. `"cyl:gear"`).
+
+* New function `row_sums()`, to calculate row sums (optionally with minimum
+  amount of valid values), as complement to `row_means()`.
+
+* New function `row_count()`, to count specific values row-wise.
+
+* `data_read()` no longer shows warning about forthcoming breaking changes
+  in upstream packages when reading `.RData` files.
+
+* `data_modify()` now recognizes `n()`, for example to create an index for data groups
+  with `1:n()` (#535).
+
+BUG FIXES
+
+* `describe_distribution()` no longer errors if the sample was too sparse to compute
+  CIs. Instead, it warns the user and returns `NA` (#550).
+
+* `data_read()` preserves variable types when importing files from `rds` or
+  `rdata` format (#558).
+
+# datawizard 0.13.0
+
+BREAKING CHANGES
+
+* `data_rename()` now errors when the `replacement` argument contains `NA` values
+  or empty strings (#539).
+
+* Removed deprecated functions `get_columns()`, `data_find()`, `format_text()` (#546).
+
+* Removed deprecated arguments `group` and `na.rm` in multiple functions. Use `by` and `remove_na` instead (#546).
+
+* The default value for the argument `dummy_factors` in `to_numeric()` has
+  changed from `TRUE` to `FALSE` (#544).
+
+CHANGES
+
+* The `pattern` argument in `data_rename()` can also be a named vector. In this
+  case, names are used as values for the `replacement` argument (i.e. `pattern`
+  can be a character vector using `<new name> = "<old name>"`).
+
+* `categorize()` gains a new `breaks` argument, to decide whether breaks are
+  inclusive or exclusive (#548).
+
+* The `labels` argument in `categorize()` gets two new options, `"range"` and
+  `"observed"`, to use the range of categorized values as labels (i.e. factor
+  levels) (#548).
+
+* Minor additions to `reshape_ci()` to work with forthcoming changes in the
+  `{bayestestR}` package.
+
+# datawizard 0.12.3
+
+CHANGES
+
+* `demean()` (and `degroup()`) now also work for nested designs, if argument
+  `nested = TRUE` and  `by` specifies more than one variable (#533).
+
+* Vignettes are no longer provided in the package, they are now only available
+  on the website. There is only one "Overview" vignette available in the package,
+  it contains links to the other vignettes on the website. This is because there
+  are CRAN errors occurring when building vignettes on macOS and we couldn't
+  determine the cause after multiple patch releases (#534).
+
+# datawizard 0.12.2
+
+* Remove `htmltools` from `Suggests` in an attempt of fixing an error in CRAN
+  checks due to failures to build a vignette (#528).
+
+# datawizard 0.12.1
+
+This is a patch release to fix one error on CRAN checks occurring because of a
+missing package namespace in one of the vignettes.
+
+# datawizard 0.12.0
+
+BREAKING CHANGES
+
+* The argument `include_na` in `data_tabulate()` and `data_summary()` has been
+  renamed into `remove_na`. Consequently, to mimic former behaviour, `FALSE` and
+  `TRUE` need to be switched (i.e. `remove_na = TRUE` is equivalent to the former
+  `include_na = FALSE`).
+
+* Class names for objects returned by `data_tabulate()` have been changed to
+  `datawizard_table` and `datawizard_crosstable` (resp. the plural forms,
+  `*_tables`), to provide a clearer and more consistent naming scheme.
+
+CHANGES
 
 * `data_select()` can directly rename selected variables when a named vector
   is provided in `select`, e.g. `data_select(mtcars, c(new1 = "mpg", new2 = "cyl"))`.
 
+* `data_tabulate()` gains an `as.data.frame()` method, to return the frequency
+  table as a data frame. The structure of the returned object is a nested data
+  frame, where the first column contains name of the variable for which
+  frequencies were calculated, and the second column contains the frequency table.
+
+* `demean()` (and `degroup()`) now also work for cross-classified designs, or
+  more generally, for data with multiple grouping or cluster variables (i.e.
+  `by` can now specify more than one variable).
+
 # datawizard 0.11.0
 
 BREAKING CHANGES
@@ -43,8 +148,8 @@ BREAKING CHANGES
 
 * The following arguments were deprecated in 0.5.0 and are now removed:
 
-  * in `data_to_wide()`: `colnames_from`, `rows_from`, `sep` 
-  * in `data_to_long()`: `colnames_to` 
+  * in `data_to_wide()`: `colnames_from`, `rows_from`, `sep`
+  * in `data_to_long()`: `colnames_to`
   * in `data_partition()`: `training_proportion`
 
 NEW FUNCTIONS
@@ -63,7 +168,7 @@ CHANGES
   argument, to compute weighted frequency tables. `include_na` allows to include
   or omit missing values from the table. Furthermore, a `by` argument was added,
   to compute crosstables (#479, #481).
-  
+
 # datawizard 0.9.1
 
 CHANGES
@@ -114,7 +219,7 @@ CHANGES
 
 * `unnormalize()` and `unstandardize()` now work with grouped data (#415).
 
-* `unnormalize()` now errors instead of emitting a warning if it doesn't have the 
+* `unnormalize()` now errors instead of emitting a warning if it doesn't have the
   necessary info (#415).
 
 BUG FIXES
@@ -137,7 +242,7 @@ BUG FIXES
 
 * Fixed issue in `data_filter()` where functions containing a `=` (e.g. when
   naming arguments, like `grepl(pattern, x = a)`) were mistakenly seen as
-  faulty syntax. 
+  faulty syntax.
 
 * Fixed issue in `empty_column()` for strings with invalid multibyte strings.
   For such data frames or files, `empty_column()` or `data_read()` no longer
@@ -174,14 +279,14 @@ CHANGES
 
 NEW FUNCTIONS
 
-* `rowid_as_column()` to complement `rownames_as_column()` (and to mimic 
-  `tibble::rowid_to_column()`). Note that its behavior is different from 
+* `rowid_as_column()` to complement `rownames_as_column()` (and to mimic
+  `tibble::rowid_to_column()`). Note that its behavior is different from
   `tibble::rowid_to_column()` for grouped data. See the Details section in the
   docs.
 
 * `data_unite()`, to merge values of multiple variables into one new variable.
 
-* `data_separate()`, as counterpart to `data_unite()`, to separate a single 
+* `data_separate()`, as counterpart to `data_unite()`, to separate a single
   variable into multiple new variables.
 
 * `data_modify()`, to create new variables, or modify or remove existing
@@ -204,7 +309,7 @@ BUG FIXES
 
 * `center()` and `standardize()` did not work for grouped data frames (of class
   `grouped_df`) when `force = TRUE`.
-  
+
 * The `data.frame` method of `describe_distribution()` returns `NULL` instead of
   an error if no valid variable were passed (for example a factor variable with
   `include_factors = FALSE`) (#421).
@@ -232,12 +337,12 @@ BUG FIXES
 
 # datawizard 0.7.0
 
-BREAKING CHANGES 
+BREAKING CHANGES
 
 * In selection patterns, expressions like `-var1:var3` to exclude all variables
   between `var1` and `var3` are no longer accepted. The correct expression is
   `-(var1:var3)`. This is for 2 reasons:
-  
+
   * to be consistent with the behavior for numerics (`-1:2` is not accepted but
     `-(1:2)` is);
   * to be consistent with `dplyr::select()`, which throws a warning and only
@@ -249,8 +354,8 @@ NEW FUNCTIONS
   or more variables into a new variable.
 
 * `mean_sd()` and `median_mad()` for summarizing vectors to their mean (or
-  median) and a range of one SD (or MAD) above and below.  
-  
+  median) and a range of one SD (or MAD) above and below.
+
 * `data_write()` as counterpart to `data_read()`, to write data frames into
   CSV, SPSS, SAS, Stata files and many other file types. One advantage over
   existing functions to write data in other packages is that labelled (numeric)
@@ -266,8 +371,8 @@ MINOR CHANGES
 
 * `data_rename()` gets a `verbose` argument.
 * `winsorize()` now errors if the threshold is incorrect (previously, it provided
-  a warning and returned the unchanged data). The argument `verbose` is now 
-  useless but is kept for backward compatibility. The documentation now contains   
+  a warning and returned the unchanged data). The argument `verbose` is now
+  useless but is kept for backward compatibility. The documentation now contains
   details about the valid values for `threshold` (#357).
 * In all functions that have arguments `select` and/or `exclude`, there is now
   one warning per misspelled variable. The previous behavior was to have only one
@@ -288,7 +393,7 @@ BUG FIXES
 * Fix unexpected warning in `convert_na_to()` when `select` is a list (#352).
 * Fixed issue with correct labelling of numeric variables with more than nine
   unique values and associated value labels.
-  
+
 
 # datawizard 0.6.5
 
@@ -320,7 +425,7 @@ NEW FUNCTIONS
 * `data_codebook()`: to generate codebooks of data frames.
 
 * New functions to deal with duplicates: `data_duplicated()` (keep all duplicates,
-  including the first occurrence) and `data_unique()` (returns the data, excluding 
+  including the first occurrence) and `data_unique()` (returns the data, excluding
   all duplicates except one instance of each, based on the selected method).
 
 MINOR CHANGES
@@ -330,15 +435,15 @@ MINOR CHANGES
 * The `include_bounds` argument in `normalize()` can now also be a numeric
   value, defining the limit to the upper and lower bound (i.e. the distance
   to 1 and 0).
-  
-* `data_filter()` now works with grouped data. 
+
+* `data_filter()` now works with grouped data.
 
 BUG FIXES
 
 * `data_read()` no longer prints message for empty columns when the data
   actually had no empty columns.
-  
- * `data_to_wide()` now drops columns that are not in `id_cols` (if specified), 
+
+ * `data_to_wide()` now drops columns that are not in `id_cols` (if specified),
   `names_from`, or `values_from`. This is the behaviour observed in `tidyr::pivot_wider()`.
 
 # datawizard 0.6.3
@@ -770,4 +875,3 @@ NEW FUNCTIONS
 # datawizard 0.1.0
 
 * First release.
-
diff --git a/R/categorize.R b/R/categorize.R
index a6562ab68..9f8dd7505 100644
--- a/R/categorize.R
+++ b/R/categorize.R
@@ -31,10 +31,18 @@
 #'   for numeric variables, the minimum of the original input is preserved. For
 #'   factors, the default minimum is `1`. For `split = "equal_range"`, the
 #'   default minimum is always `1`, unless specified otherwise in `lowest`.
+#' @param breaks Character, indicating whether breaks for categorizing data are
+#'   `"inclusive"` (values indicate the _upper_ bound of the _previous_ group or
+#'   interval) or `"exclusive"` (values indicate the _lower_ bound of the _next_
+#'   group or interval to begin). Use `labels = "range"` to make this behaviour
+#'   easier to see.
 #' @param labels Character vector of value labels. If not `NULL`, `categorize()`
 #'   will returns factors instead of numeric variables, with `labels` used
-#'   for labelling the factor levels. Can also be `"mean"` or `"median"` for a
-#'   factor with labels as the mean/median of each groups.
+#'   for labelling the factor levels. Can also be `"mean"`, `"median"`,
+#'   `"range"` or `"observed"` for a factor with labels as the mean/median,
+#'   the requested range (even if not all values of that range are present in
+#'   the data) or observed range (range of the actual recoded values) of each
+#'   group. See 'Examples'.
 #' @param append Logical or string. If `TRUE`, recoded or converted variables
 #'   get new column names and are appended (column bind) to `x`, thus returning
 #'   both the original and the recoded variables. The new columns get a suffix,
@@ -53,7 +61,7 @@
 #'
 #' # Splits and breaks (cut-off values)
 #'
-#' Breaks are in general _exclusive_, this means that these values indicate
+#' Breaks are by default _exclusive_, this means that these values indicate
 #' the lower bound of the next group or interval to begin. Take a simple
 #' example, a numeric variable with values from 1 to 9. The median would be 5,
 #' thus the first interval ranges from 1-4 and is recoded into 1, while 5-9
@@ -63,6 +71,9 @@
 #' from 1 to 3 belong to the first interval and are recoded into 1 (because
 #' the next interval starts at 3.67), 4 to 6 into 2 and 7 to 9 into 3.
 #'
+#' The opposite behaviour can be achieved using `breaks = "inclusive"`, in which
+#' case
+#'
 #' # Recoding into groups with equal size or range
 #'
 #' `split = "equal_length"` and `split = "equal_range"` try to divide the
@@ -119,6 +130,13 @@
 #' x <- sample(1:10, size = 30, replace = TRUE)
 #' categorize(x, "equal_length", n_groups = 3, labels = "mean")
 #' categorize(x, "equal_length", n_groups = 3, labels = "median")
+#'
+#' # cut numeric into groups with the requested range as a label name
+#' # each category has the same range, and labels indicate this range
+#' categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "range")
+#' # in this example, each category has the same range, but labels only refer
+#' # to the ranges of the actual values (present in the data) inside each group
+#' categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "observed")
 #' @export
 categorize <- function(x, ...) {
   UseMethod("categorize")
@@ -142,6 +160,7 @@ categorize.numeric <- function(x,
                                n_groups = NULL,
                                range = NULL,
                                lowest = 1,
+                               breaks = "exclusive",
                                labels = NULL,
                                verbose = TRUE,
                                ...) {
@@ -152,6 +171,9 @@ categorize.numeric <- function(x,
   if (identical(split, "equal_length")) split <- "length"
   if (identical(split, "equal_range")) split <- "range"
 
+  # check for valid values
+  breaks <- match.arg(breaks, c("exclusive", "inclusive"))
+
   # save
   original_x <- x
 
@@ -169,9 +191,9 @@ categorize.numeric <- function(x,
   }
 
   if (is.numeric(split)) {
-    breaks <- split
+    category_splits <- split
   } else {
-    breaks <- switch(split,
+    category_splits <- switch(split,
       median = stats::median(x),
       mean = mean(x),
       length = n_groups,
@@ -182,15 +204,18 @@ categorize.numeric <- function(x,
   }
 
   # complete ranges, including minimum and maximum
-  if (!identical(split, "length")) breaks <- unique(c(min(x), breaks, max(x)))
+  if (!identical(split, "length")) {
+    category_splits <- unique(c(min(x), category_splits, max(x)))
+  }
 
   # recode into groups
   out <- droplevels(cut(
     x,
-    breaks = breaks,
+    breaks = category_splits,
     include.lowest = TRUE,
-    right = FALSE
+    right = identical(breaks, "inclusive")
   ))
+  cut_result <- out
   levels(out) <- 1:nlevels(out)
 
   # fix lowest value, add back into original vector
@@ -201,7 +226,7 @@ categorize.numeric <- function(x,
   original_x[!is.na(original_x)] <- out
 
   # turn into factor?
-  .original_x_to_factor(original_x, x, labels, out, verbose, ...)
+  .original_x_to_factor(original_x, x, cut_result, labels, out, verbose, ...)
 }
 
 
@@ -223,6 +248,7 @@ categorize.data.frame <- function(x,
                                   n_groups = NULL,
                                   range = NULL,
                                   lowest = 1,
+                                  breaks = "exclusive",
                                   labels = NULL,
                                   append = FALSE,
                                   ignore_case = FALSE,
@@ -260,6 +286,7 @@ categorize.data.frame <- function(x,
     n_groups = n_groups,
     range = range,
     lowest = lowest,
+    breaks = breaks,
     labels = labels,
     verbose = verbose,
     ...
@@ -276,6 +303,7 @@ categorize.grouped_df <- function(x,
                                   n_groups = NULL,
                                   range = NULL,
                                   lowest = 1,
+                                  breaks = "exclusive",
                                   labels = NULL,
                                   append = FALSE,
                                   ignore_case = FALSE,
@@ -319,6 +347,7 @@ categorize.grouped_df <- function(x,
       n_groups = n_groups,
       range = range,
       lowest = lowest,
+      breaks = breaks,
       labels = labels,
       select = select,
       exclude = exclude,
@@ -375,20 +404,26 @@ categorize.grouped_df <- function(x,
 }
 
 
-.original_x_to_factor <- function(original_x, x, labels, out, verbose, ...) {
+.original_x_to_factor <- function(original_x, x, cut_result, labels, out, verbose, ...) {
   if (!is.null(labels)) {
     if (length(labels) == length(unique(out))) {
       original_x <- as.factor(original_x)
       levels(original_x) <- labels
-    } else if (length(labels) == 1 && labels %in% c("mean", "median")) {
+    } else if (length(labels) == 1 && labels %in% c("mean", "median", "range", "observed")) {
       original_x <- as.factor(original_x)
       no_na_x <- original_x[!is.na(original_x)]
-      if (labels == "mean") {
-        labels <- stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x
-      } else {
-        labels <- stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x
-      }
-      levels(original_x) <- insight::format_value(labels, ...)
+      out <- switch(labels,
+        mean = stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x,
+        median = stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x,
+        # labels basically like what "cut()" returns
+        range = levels(cut_result),
+        # range based on the values that are actually present in the data
+        {
+          temp <- stats::aggregate(x, list(no_na_x), FUN = range, na.rm = TRUE)$x
+          apply(temp, 1, function(i) paste0("(", paste(as.vector(i), collapse = "-"), ")"))
+        }
+      )
+      levels(original_x) <- insight::format_value(out, ...)
     } else if (isTRUE(verbose)) {
       insight::format_warning(
         "Argument `labels` and levels of the recoded variable are not of the same length.",
diff --git a/R/data_codebook.R b/R/data_codebook.R
index 71e2e5828..5dffbf9c2 100644
--- a/R/data_codebook.R
+++ b/R/data_codebook.R
@@ -33,7 +33,8 @@
 #'
 #' @note There are methods to `print()` the data frame in a nicer output, as
 #' well methods for printing in markdown or HTML format (`print_md()` and
-#' `print_html()`).
+#' `print_html()`). The `print()` method for text outputs passes arguments in
+#' `...` to [`insight::export_table()`].
 #'
 #' @examples
 #' data(iris)
@@ -369,7 +370,8 @@ print.data_codebook <- function(x, ...) {
       title = caption,
       empty_line = "-",
       cross = "+",
-      align = .get_codebook_align(x)
+      align = .get_codebook_align(x),
+      ...
     )
   )
 }
diff --git a/R/data_group.R b/R/data_group.R
index 00a7adf84..538c875c2 100644
--- a/R/data_group.R
+++ b/R/data_group.R
@@ -51,7 +51,7 @@ data_group <- function(data,
       to = my_grid[i, , drop = FALSE],
       match = "and",
       return_indices = TRUE,
-      drop_na = FALSE
+      remove_na = FALSE
     ))
   })
   my_grid[[".rows"]] <- .rows
diff --git a/R/data_match.R b/R/data_match.R
index c03b3f222..6b522a0b8 100644
--- a/R/data_match.R
+++ b/R/data_match.R
@@ -15,7 +15,7 @@
 #' @param return_indices Logical, if `FALSE`, return the vector of rows that
 #'   can be used to filter the original data frame. If `FALSE` (default),
 #'   returns directly the filtered data frame instead of the row indices.
-#' @param drop_na Logical, if `TRUE`, missing values (`NA`s) are removed before
+#' @param remove_na Logical, if `TRUE`, missing values (`NA`s) are removed before
 #'   filtering the data. This is the default behaviour, however, sometimes when
 #'   row indices are requested (i.e. `return_indices=TRUE`), it might be useful
 #'   to preserve `NA` values, so returned row indices match the row indices of
@@ -26,6 +26,7 @@
 #'   character vector (e.g. `c("x > 4", "y == 2")`) or a variable that contains
 #'   the string representation of a logical expression. These might be useful
 #'   when used in packages to avoid defining undefined global variables.
+#' @param drop_na Deprecated, please use `remove_na` instead.
 #'
 #' @return A filtered data frame, or the row indices that match the specified
 #' configuration.
@@ -100,12 +101,24 @@
 #' data_filter(mtcars, fl)
 #' @inherit data_rename seealso
 #' @export
-data_match <- function(x, to, match = "and", return_indices = FALSE, drop_na = TRUE, ...) {
+data_match <- function(x,
+                       to,
+                       match = "and",
+                       return_indices = FALSE,
+                       remove_na = TRUE,
+                       drop_na,
+                       ...) {
   if (!is.data.frame(to)) {
     to <- as.data.frame(to)
   }
   original_x <- x
 
+  ## TODO: remove deprecated argument later
+  if (!missing(drop_na)) {
+    insight::format_warning("Argument `drop_na` is deprecated. Please use `remove_na` instead.")
+    remove_na <- drop_na
+  }
+
   # evaluate
   match <- match.arg(tolower(match), c("and", "&", "&&", "or", "|", "||", "!", "not"))
   match <- switch(match,
@@ -133,7 +146,7 @@ data_match <- function(x, to, match = "and", return_indices = FALSE, drop_na = T
     idx <- vector("numeric", length = 0L)
   } else {
     # remove missings before matching
-    if (isTRUE(drop_na)) {
+    if (isTRUE(remove_na)) {
       x <- x[stats::complete.cases(x), , drop = FALSE]
     }
     idx <- seq_len(nrow(x))
diff --git a/R/data_modify.R b/R/data_modify.R
index e7744c1f5..3e30b8f68 100644
--- a/R/data_modify.R
+++ b/R/data_modify.R
@@ -22,6 +22,9 @@
 #'   character vector is provided, you may not add further elements to `...`.
 #' - Using `NULL` as right-hand side removes a variable from the data frame.
 #'   Example: `Petal.Width = NULL`.
+#' - For data frames (including grouped ones), the function `n()` can be used to count the
+#'   number of observations and thereby, for instance, create index values by
+#'   using `id = 1:n()` or `id = 3:(n()+2)` and similar.
 #'
 #' Note that newly created variables can be used in subsequent expressions,
 #' including `.at` or `.if`. See also 'Examples'.
@@ -92,7 +95,8 @@
 #'   grouped_efc,
 #'   c12hour_c = center(c12hour),
 #'   c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),
-#'   c12hour_z2 = standardize(c12hour)
+#'   c12hour_z2 = standardize(c12hour),
+#'   id = 1:n()
 #' )
 #' head(new_efc)
 #'
@@ -145,6 +149,11 @@ data_modify.default <- function(data, ...) {
 data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify = NULL) {
   dots <- eval(substitute(alist(...)))
 
+  # error for data frames with no rows...
+  if (nrow(data) == 0) {
+    insight::format_error("`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.") # nolint
+  }
+
   # check if we have dots, or only at/modify ----
 
   if (length(dots)) {
@@ -201,6 +210,10 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
   # the data.frame method later...
   dots <- match.call(expand.dots = FALSE)[["..."]]
 
+  # error for data frames with no rows...
+  if (nrow(data) == 0) {
+    insight::format_error("`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.") # nolint
+  }
 
   grps <- attr(data, "groups", exact = TRUE)
   grps <- grps[[".rows"]]
@@ -352,8 +365,12 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
   # finally, we can evaluate expression and get values for new variables
   symbol_string <- insight::safe_deparse(symbol)
   if (!is.null(symbol_string) && all(symbol_string == "n()")) {
-    # "special" functions
+    # "special" functions - using "n()" just returns number of rows
     new_variable <- nrow(data)
+  } else if (!is.null(symbol_string) && length(symbol_string) == 1 && grepl("\\bn\\(\\)", symbol_string)) {
+    # "special" functions, like "1:n()" or similar - but not "1:fun()"
+    symbol_string <- str2lang(gsub("n()", "nrow(data)", symbol_string, fixed = TRUE))
+    new_variable <- try(with(data, eval(symbol_string)), silent = TRUE)
   } else {
     # default evaluation of expression
     new_variable <- try(with(data, eval(symbol)), silent = TRUE)
diff --git a/R/data_partition.R b/R/data_partition.R
index 09add9dd7..99f481e18 100644
--- a/R/data_partition.R
+++ b/R/data_partition.R
@@ -15,7 +15,6 @@
 #' @param row_id Character string, indicating the name of the column that
 #'   contains the row-id's.
 #' @param verbose Toggle messages and warnings.
-#' @param group Deprecated. Use `by` instead.
 #'
 #' @return A list of data frames. The list includes one training set per given
 #'   proportion and the remaining data as test set. List elements of training
@@ -50,17 +49,10 @@ data_partition <- function(data,
                            seed = NULL,
                            row_id = ".row_id",
                            verbose = TRUE,
-                           group = NULL,
                            ...) {
   # validation checks
   data <- .coerce_to_dataframe(data)
 
-  ## TODO: remove warning in future release
-  if (!is.null(group)) {
-    by <- group
-    insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
-  }
-
   if (sum(proportion) > 1) {
     insight::format_error("Sum of `proportion` cannot be higher than 1.")
   }
diff --git a/R/data_read.R b/R/data_read.R
index 5137a7735..b24a5bdc2 100644
--- a/R/data_read.R
+++ b/R/data_read.R
@@ -15,15 +15,16 @@
 #' for SAS data files.
 #' @param encoding The character encoding used for the file. Usually not needed.
 #' @param convert_factors If `TRUE` (default), numeric variables, where all
-#' values have a value label, are assumed to be categorical and converted
-#' into factors. If `FALSE`, no variable types are guessed and no conversion
-#' of numeric variables into factors will be performed. See also section
-#' 'Differences to other packages'. For `data_write()`, this argument only
-#' applies to the text (e.g. `.txt` or `.csv`) or spreadsheet file formats (like
-#' `.xlsx`). Converting to factors might be useful for these formats because
-#' labelled numeric variables are then converted into factors and exported as
-#' character columns - else, value labels would be lost and only numeric values
-#' are written to the file.
+#' values have a value label, are assumed to be categorical and converted into
+#' factors. If `FALSE`, no variable types are guessed and no conversion of
+#' numeric variables into factors will be performed. For `data_read()`, this
+#' argument only applies to file types with *labelled data*, e.g. files from
+#' SPSS, SAS or Stata. See also section 'Differences to other packages'. For
+#' `data_write()`, this argument only applies to the text (e.g. `.txt` or
+#' `.csv`) or spreadsheet file formats (like `.xlsx`). Converting to factors
+#' might be useful for these formats because labelled numeric variables are then
+#' converted into factors and exported as character columns - else, value labels
+#' would be lost and only numeric values are written to the file.
 #' @param verbose Toggle warnings and messages.
 #' @param ... Arguments passed to the related `read_*()` or `write_*()` functions.
 #'
@@ -65,12 +66,13 @@
 #' @section Differences to other packages that read foreign data formats:
 #' `data_read()` is most comparable to `rio::import()`. For data files from
 #' SPSS, SAS or Stata, which support labelled data, variables are converted into
-#' their most appropriate type. The major difference to `rio::import()` is that
-#' `data_read()` automatically converts fully labelled numeric variables into
-#' factors, where imported value labels will be set as factor levels. If a
-#' numeric variable has _no_ value labels or less value labels than values, it
-#' is not converted to factor. In this case, value labels are preserved as
-#' `"labels"` attribute. Character vectors are preserved. Use
+#' their most appropriate type. The major difference to `rio::import()` is for
+#' data files from SPSS, SAS, or Stata, i.e. file types that support
+#' *labelled data*. `data_read()` automatically converts fully labelled numeric
+#' variables into factors, where imported value labels will be set as factor
+#' levels. If a numeric variable has _no_ value labels or less value labels than
+#' values, it is not converted to factor. In this case, value labels are
+#' preserved as `"labels"` attribute. Character vectors are preserved. Use
 #' `convert_factors = FALSE` to remove the automatic conversion of numeric
 #' variables to factors.
 #'
@@ -105,7 +107,7 @@ data_read <- function(path,
     por = .read_spss(path, encoding, convert_factors, verbose, ...),
     dta = .read_stata(path, encoding, convert_factors, verbose, ...),
     sas7bdat = .read_sas(path, path_catalog, encoding, convert_factors, verbose, ...),
-    .read_unknown(path, file_type, convert_factors, verbose, ...)
+    .read_unknown(path, file_type, verbose, ...)
   )
 
   # tell user about empty columns
@@ -161,7 +163,7 @@ data_read <- function(path,
   # user may decide whether we automatically detect variable type or not
   if (isTRUE(convert_factors)) {
     if (verbose) {
-      msg <- "Variables where all values have associated labels are now converted into factors. If this is not intended, use `convert_factors = FALSE`."
+      msg <- "Variables where all values have associated labels are now converted into factors. If this is not intended, use `convert_factors = FALSE`." # nolint
       insight::format_alert(msg)
     }
     x[] <- lapply(x, function(i) {
@@ -188,7 +190,7 @@ data_read <- function(path,
           value_labels <- NULL
           attr(i, "converted_to_factor") <- TRUE
         } else {
-          # else, fall back to numeric
+          # else, fall back to numeric or factor
           i <- as.numeric(i)
         }
 
@@ -288,7 +290,7 @@ data_read <- function(path,
 }
 
 
-.read_unknown <- function(path, file_type, convert_factors, verbose, ...) {
+.read_unknown <- function(path, file_type, verbose, ...) {
   insight::check_if_installed("rio", reason = paste0("to read files of type '", file_type, "'"))
   if (verbose) {
     insight::format_alert("Reading data...")
@@ -296,7 +298,7 @@ data_read <- function(path,
   # set up arguments. for RDS, we set trust = TRUE, to avoid warnings
   rio_args <- list(file = path)
   # check if we have RDS, and if so, add trust = TRUE
-  if (file_type == "rds") {
+  if (file_type %in% c("rds", "rdata")) {
     rio_args$trust <- TRUE
   }
   out <- do.call(rio::import, c(rio_args, list(...)))
@@ -317,6 +319,5 @@ data_read <- function(path,
     }
     out <- tmp
   }
-
-  .post_process_imported_data(out, convert_factors, verbose)
+  out
 }
diff --git a/R/data_rename.R b/R/data_rename.R
index b8f213c7f..18f45657b 100644
--- a/R/data_rename.R
+++ b/R/data_rename.R
@@ -13,11 +13,15 @@
 #' @param pattern Character vector. For `data_rename()`, indicates columns that
 #'   should be selected for renaming. Can be `NULL` (in which case all columns
 #'   are selected). For `data_addprefix()` or `data_addsuffix()`, a character
-#'   string, which will be added as prefix or suffix to the column names.
+#'   string, which will be added as prefix or suffix to the column names. For
+#'   `data_rename()`, `pattern` can also be a named vector. In this case, names
+#'   are used as values for the `replacement` argument (i.e. `pattern` can be a
+#'   character vector using `<new name> = "<old name>"` and argument `replacement`
+#'   will be ignored then).
 #' @param replacement Character vector. Indicates the new name of the columns
 #'   selected in `pattern`. Can be `NULL` (in which case column are numbered
 #'   in sequential order). If not `NULL`, `pattern` and `replacement` must be
-#'   of the same length.
+#'   of the same length. If `pattern` is a named vector, `replacement` is ignored.
 #' @param rows Vector of row names.
 #' @param safe Do not throw error if for instance the variable to be
 #'   renamed/removed doesn't exist.
@@ -33,12 +37,14 @@
 #' head(data_rename(iris, "FakeCol", "length")) # This doesn't
 #' head(data_rename(iris, c("Sepal.Length", "Sepal.Width"), c("length", "width")))
 #'
+#' # use named vector to rename
+#' head(data_rename(iris, c(length = "Sepal.Length", width = "Sepal.Width")))
+#'
 #' # Reset names
 #' head(data_rename(iris, NULL))
 #'
 #' # Change all
 #' head(data_rename(iris, replacement = paste0("Var", 1:5)))
-#'
 #' @seealso
 #' - Functions to rename stuff: [data_rename()], [data_rename_rows()], [data_addprefix()], [data_addsuffix()]
 #' - Functions to reorder or remove columns: [data_reorder()], [data_relocate()], [data_remove()]
@@ -66,11 +72,44 @@ data_rename <- function(data,
     insight::format_error("Argument `pattern` must be of type character.")
   }
 
+  # check if `pattern` has names, and if so, use as "replacement"
+  if (!is.null(names(pattern))) {
+    replacement <- names(pattern)
+  }
+
   # name columns 1, 2, 3 etc. if no replacement
   if (is.null(replacement)) {
     replacement <- paste0(seq_along(pattern))
   }
 
+  # coerce to character
+  replacement <- as.character(replacement)
+
+  # check if `replacement` has no empty strings and no NA values
+  invalid_replacement <- is.na(replacement) | !nzchar(replacement)
+  if (any(invalid_replacement)) {
+    if (is.null(names(pattern))) {
+      # when user did not match `pattern` with `replacement`
+      msg <- c(
+        "`replacement` is not allowed to have `NA` or empty strings.",
+        sprintf(
+          "Following values in `pattern` have no match in `replacement`: %s",
+          toString(pattern[invalid_replacement])
+        )
+      )
+    } else {
+      # when user did not name all elements of `pattern`
+      msg <- c(
+        "Either name all elements of `pattern` or use `replacement`.",
+        sprintf(
+          "Following values in `pattern` were not named: %s",
+          toString(pattern[invalid_replacement])
+        )
+      )
+    }
+    insight::format_error(msg)
+  }
+
   # if duplicated names in replacement, append ".2", ".3", etc. to duplicates
   # ex: c("foo", "foo") -> c("foo", "foo.2")
   if (anyDuplicated(replacement) > 0L) {
diff --git a/R/data_select.R b/R/data_select.R
index 0f62ba398..db91fc06b 100644
--- a/R/data_select.R
+++ b/R/data_select.R
@@ -38,25 +38,3 @@ data_select <- function(data,
   out <- .replace_attrs(out, a)
   out
 }
-
-
-#' @rdname extract_column_names
-#' @export
-get_columns <- function(data,
-                        select = NULL,
-                        exclude = NULL,
-                        ignore_case = FALSE,
-                        regex = FALSE,
-                        verbose = TRUE,
-                        ...) {
-  insight::format_warning("Function `get_columns()` is deprecated and will be removed in a future release. Please use `data_select()` instead.") # nolint
-  data_select(
-    data,
-    select = select,
-    exclude = exclude,
-    ignore_case = ignore_case,
-    regex = regex,
-    verbose = verbose,
-    ...
-  )
-}
diff --git a/R/data_summary.R b/R/data_summary.R
index 8d15f8483..7662d0c94 100644
--- a/R/data_summary.R
+++ b/R/data_summary.R
@@ -8,9 +8,9 @@
 #' @param by Optional character string, indicating the name of a variable in `x`.
 #' If supplied, the data will be split by this variable and summary statistics
 #' will be computed for each group.
-#' @param include_na Logical. If `TRUE`, missing values are included as a level
-#' in the grouping variable. If `FALSE`, missing values are omitted from the
-#' grouping variable.
+#' @param remove_na Logical. If `TRUE`, missing values are omitted from the
+#' grouping variable. If `FALSE` (default), missing values are included as a
+#' level in the grouping variable.
 #' @param ... One or more named expressions that define the new variable name
 #' and the function to compute the summary statistic. Example:
 #' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided
@@ -57,8 +57,8 @@ data_summary <- function(x, ...) {
 
 
 #' @export
-data_summary.matrix <- function(x, ..., by = NULL, include_na = TRUE) {
-  data_summary(as.data.frame(x), ..., by = by, include_na = include_na)
+data_summary.matrix <- function(x, ..., by = NULL, remove_na = FALSE) {
+  data_summary(as.data.frame(x), ..., by = by, remove_na = remove_na)
 }
 
 
@@ -70,7 +70,7 @@ data_summary.default <- function(x, ...) {
 
 #' @rdname data_summary
 #' @export
-data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
+data_summary.data.frame <- function(x, ..., by = NULL, remove_na = FALSE) {
   dots <- eval(substitute(alist(...)))
 
   # do we have any expression at all?
@@ -103,10 +103,10 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
     }
     # split data, add NA levels, if requested
     l <- lapply(x[by], function(i) {
-      if (include_na && anyNA(i)) {
-        addNA(i)
-      } else {
+      if (remove_na || !anyNA(i)) {
         i
+      } else {
+        addNA(i)
       }
     })
     split_data <- split(x, l, drop = TRUE)
@@ -137,7 +137,7 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
 
 
 #' @export
-data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
+data_summary.grouped_df <- function(x, ..., by = NULL, remove_na = FALSE) {
   # extract group variables
   grps <- attr(x, "groups", exact = TRUE)
   group_variables <- data_remove(grps, ".rows")
@@ -148,7 +148,7 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
   # remove information specific to grouped df's
   attr(x, "groups") <- NULL
   class(x) <- "data.frame"
-  data_summary(x, ..., by = by, include_na = include_na)
+  data_summary(x, ..., by = by, remove_na = remove_na)
 }
 
 
diff --git a/R/data_tabulate.R b/R/data_tabulate.R
index 6a26a39c9..621fabbb1 100644
--- a/R/data_tabulate.R
+++ b/R/data_tabulate.R
@@ -15,7 +15,7 @@
 #' factor levels are dropped from the frequency table.
 #' @param name Optional character string, which includes the name that is used
 #' for printing.
-#' @param include_na Logical, if `TRUE`, missing values are included in the
+#' @param remove_na Logical, if `FALSE`, missing values are included in the
 #' frequency or crosstable, else missing values are omitted.
 #' @param collapse Logical, if `TRUE` collapses multiple tables into one larger
 #' table for printing. This affects only printing, not the returned object.
@@ -28,12 +28,19 @@
 #' @param ... not used.
 #' @inheritParams extract_column_names
 #'
+#' @details
+#' There is an `as.data.frame()` method, to return the frequency tables as a
+#' data frame. The structure of the returned object is a nested data frame,
+#' where the first column contains name of the variable for which frequencies
+#' were calculated, and the second column is a list column that contains the
+#' frequency tables as data frame. See 'Examples'.
+#'
 #' @section Crosstables:
 #' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>`
 #' (missing) values by default. The first column indicates values of `x`, the
 #' first row indicates values of `by` (including missing values). The last row
 #' and column contain the total frequencies for each row and column, respectively.
-#' Setting `include_na = FALSE` will omit missing values from the crosstable.
+#' Setting `remove_na = FALSE` will omit missing values from the crosstable.
 #' Setting `proportions` to `"row"` or `"column"` will add row or column
 #' percentages. Setting `proportions` to `"full"` will add relative frequencies
 #' for the full table.
@@ -41,7 +48,8 @@
 #' @note
 #' There are `print_html()` and `print_md()` methods available for printing
 #' frequency or crosstables in HTML and markdown format, e.g.
-#' `print_html(data_tabulate(x))`.
+#' `print_html(data_tabulate(x))`. The `print()` method for text outputs passes
+#' arguments in `...` to [`insight::export_table()`].
 #'
 #' @return A data frame, or a list of data frames, with one frequency table
 #' as data frame per variable.
@@ -55,7 +63,7 @@
 #' data_tabulate(efc$c172code)
 #'
 #' # drop missing values
-#' data_tabulate(efc$c172code, include_na = FALSE)
+#' data_tabulate(efc$c172code, remove_na = TRUE)
 #'
 #' # data frame
 #' data_tabulate(efc, c("e42dep", "c172code"))
@@ -102,12 +110,18 @@
 #'   efc$c172code,
 #'   by = efc$e16sex,
 #'   proportions = "column",
-#'   include_na = FALSE
+#'   remove_na = TRUE
 #' )
 #'
 #' # round percentages
 #' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")
 #' print(out, digits = 0)
+#'
+#' # coerce to data frames
+#' result <- data_tabulate(efc, "c172code", by = "e16sex")
+#' as.data.frame(result)
+#' as.data.frame(result)$table
+#' as.data.frame(result, add_total = TRUE)$table
 #' @export
 data_tabulate <- function(x, ...) {
   UseMethod("data_tabulate")
@@ -120,7 +134,7 @@ data_tabulate.default <- function(x,
                                   by = NULL,
                                   drop_levels = FALSE,
                                   weights = NULL,
-                                  include_na = TRUE,
+                                  remove_na = FALSE,
                                   proportions = NULL,
                                   name = NULL,
                                   verbose = TRUE,
@@ -150,7 +164,7 @@ data_tabulate.default <- function(x,
       x,
       by = by,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       proportions = proportions,
       obj_name = obj_name,
       group_variable = group_variable
@@ -159,30 +173,34 @@ data_tabulate.default <- function(x,
 
   # frequency table
   if (is.null(weights)) {
-    if (include_na) {
-      freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
-    } else {
+    if (remove_na) {
+      # we have a `.default` and a `.data.frame` method for `data_tabulate()`.
+      # since this is the default, `x` can be an object which cannot be used
+      # with `table()`, that's why we add `tryCatch()` here. Below we give an
+      # informative error message for non-supported objects.
       freq_table <- tryCatch(table(x), error = function(e) NULL)
+    } else {
+      freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
     }
-  } else if (include_na) {
-    # weighted frequency table, including NA
+  } else if (remove_na) {
+    # weighted frequency table, excluding NA
     freq_table <- tryCatch(
       stats::xtabs(
         weights ~ x,
-        data = data.frame(weights = weights, x = addNA(x)),
-        na.action = stats::na.pass,
-        addNA = TRUE
+        data = data.frame(weights = weights, x = x),
+        na.action = stats::na.omit,
+        addNA = FALSE
       ),
       error = function(e) NULL
     )
   } else {
-    # weighted frequency table, excluding NA
+    # weighted frequency table, including NA
     freq_table <- tryCatch(
       stats::xtabs(
         weights ~ x,
-        data = data.frame(weights = weights, x = x),
-        na.action = stats::na.omit,
-        addNA = FALSE
+        data = data.frame(weights = weights, x = addNA(x)),
+        na.action = stats::na.pass,
+        addNA = TRUE
       ),
       error = function(e) NULL
     )
@@ -205,12 +223,12 @@ data_tabulate.default <- function(x,
 
   out$`Raw %` <- 100 * out$N / sum(out$N)
   # if we have missing values, we add a row with NA
-  if (include_na) {
-    out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)
-    valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)
-  } else {
+  if (remove_na) {
     out$`Valid %` <- 100 * out$N / sum(out$N)
     valid_n <- sum(out$N, na.rm = TRUE)
+  } else {
+    out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)
+    valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)
   }
   out$`Cumulative %` <- cumsum(out$`Valid %`)
 
@@ -242,7 +260,7 @@ data_tabulate.default <- function(x,
   attr(out, "total_n") <- sum(out$N, na.rm = TRUE)
   attr(out, "valid_n") <- valid_n
 
-  class(out) <- c("dw_data_tabulate", "data.frame")
+  class(out) <- c("datawizard_table", "data.frame")
 
   out
 }
@@ -258,7 +276,7 @@ data_tabulate.data.frame <- function(x,
                                      by = NULL,
                                      drop_levels = FALSE,
                                      weights = NULL,
-                                     include_na = TRUE,
+                                     remove_na = FALSE,
                                      proportions = NULL,
                                      collapse = FALSE,
                                      verbose = TRUE,
@@ -284,7 +302,7 @@ data_tabulate.data.frame <- function(x,
       proportions = proportions,
       drop_levels = drop_levels,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       name = i,
       verbose = verbose,
       ...
@@ -292,9 +310,9 @@ data_tabulate.data.frame <- function(x,
   })
 
   if (is.null(by)) {
-    class(out) <- c("dw_data_tabulates", "list")
+    class(out) <- c("datawizard_tables", "list")
   } else {
-    class(out) <- c("dw_data_xtabulates", "list")
+    class(out) <- c("datawizard_crosstabs", "list")
   }
   attr(out, "collapse") <- isTRUE(collapse)
   attr(out, "is_weighted") <- !is.null(weights)
@@ -313,7 +331,7 @@ data_tabulate.grouped_df <- function(x,
                                      proportions = NULL,
                                      drop_levels = FALSE,
                                      weights = NULL,
-                                     include_na = TRUE,
+                                     remove_na = FALSE,
                                      collapse = FALSE,
                                      verbose = TRUE,
                                      ...) {
@@ -349,7 +367,7 @@ data_tabulate.grouped_df <- function(x,
       verbose = verbose,
       drop_levels = drop_levels,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       by = by,
       proportions = proportions,
       group_variable = group_variable,
@@ -357,9 +375,9 @@ data_tabulate.grouped_df <- function(x,
     ))
   }
   if (is.null(by)) {
-    class(out) <- c("dw_data_tabulates", "list")
+    class(out) <- c("datawizard_tables", "list")
   } else {
-    class(out) <- c("dw_data_xtabulates", "list")
+    class(out) <- c("datawizard_crosstabs", "list")
   }
   attr(out, "collapse") <- isTRUE(collapse)
   attr(out, "is_weighted") <- !is.null(weights)
@@ -380,8 +398,64 @@ insight::print_html
 insight::print_md
 
 
+#' @rdname data_tabulate
+#' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and
+#' column with the total N values are added to the data frame. `add_total` has
+#' no effect in `as.data.frame()` for simple frequency tables.
+#' @inheritParams base::as.data.frame
+#' @export
+as.data.frame.datawizard_tables <- function(x,
+                                            row.names = NULL,
+                                            optional = FALSE,
+                                            ...,
+                                            stringsAsFactors = FALSE,
+                                            add_total = FALSE) {
+  # extract variables of frequencies
+  selected_vars <- unlist(lapply(x, function(i) attributes(i)$varname))
+  # coerce to data frame, remove rownames
+  data_frames <- lapply(x, function(i) {
+    # the `format()` methods for objects returned by `data_tabulate()` call
+    # `as.data.frame()` - we have to pay attention to avoid infinite iterations
+    # here. At the moment, this is no problem, as objects we have at this stage
+    # are of class "datawizard_table" or "datawizard_crosstab", while this
+    # `as.data.frame()` method is only called for "datawizard_tables" (the plural)
+    # form). Else, we would need to modify the class attribute here,
+    # e.g. class(i) <- "data.frame"
+    if (add_total) {
+      # to add the total column and row, we simply can call `format()`
+      out <- as.data.frame(format(i))
+      for (cols in 2:ncol(out)) {
+        # since "format()" returns a character matrix, we want to convert
+        # the columns to numeric. We have to exclude the first column, as the
+        # first column is character, due to the added "Total" value.
+        out[[cols]] <- as.numeric(out[[cols]])
+      }
+      # after formatting, we have a "separator" row for nicer printing.
+      # this should also be removed
+      out <- remove_empty_rows(out)
+    } else {
+      out <- as.data.frame(i)
+    }
+    rownames(out) <- NULL
+    out
+  })
+  # create nested data frame
+  result <- data.frame(
+    var = selected_vars,
+    table = I(data_frames),
+    stringsAsFactors = stringsAsFactors
+  )
+  # consider additional arguments
+  rownames(result) <- row.names
+  result
+}
+
+#' @export
+as.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables
+
+
 #' @export
-format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) {
+format.datawizard_table <- function(x, format = "text", big_mark = NULL, ...) {
   # convert to character manually, else, for large numbers,
   # format_table() returns scientific notation
   x <- as.data.frame(x)
@@ -414,7 +488,7 @@ format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) {
 
 
 #' @export
-print.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
+print.datawizard_table <- function(x, big_mark = NULL, ...) {
   a <- attributes(x)
 
   # "table" header with variable label/name, and type
@@ -449,14 +523,15 @@ print.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
   cat(insight::export_table(
     format(x, big_mark = big_mark, ...),
     cross = "+",
-    missing = "<NA>"
+    missing = "<NA>",
+    ...
   ))
   invisible(x)
 }
 
 
 #' @export
-print_html.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_table <- function(x, big_mark = NULL, ...) {
   a <- attributes(x)
 
   # "table" header with variable label/name, and type
@@ -486,7 +561,7 @@ print_html.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_md.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
+print_md.datawizard_table <- function(x, big_mark = NULL, ...) {
   a <- attributes(x)
 
   # "table" header with variable label/name, and type
@@ -516,7 +591,7 @@ print_md.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
+print.datawizard_tables <- function(x, big_mark = NULL, ...) {
   # check if we have weights
   is_weighted <- isTRUE(attributes(x)$is_weighted)
 
@@ -548,14 +623,15 @@ print.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
       out,
       missing = "<NA>",
       cross = "+",
-      empty_line = "-"
+      empty_line = "-",
+      ...
     ))
   }
 }
 
 
 #' @export
-print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_tables <- function(x, big_mark = NULL, ...) {
   # check if we have weights
   is_weighted <- isTRUE(attributes(x)$is_weighted)
 
@@ -584,7 +660,7 @@ print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_md.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
+print_md.datawizard_tables <- function(x, big_mark = NULL, ...) {
   # check if we have weights
   is_weighted <- isTRUE(attributes(x)$is_weighted)
 
diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R
index 3cb25d62b..c9595eccf 100644
--- a/R/data_xtabulate.R
+++ b/R/data_xtabulate.R
@@ -3,7 +3,7 @@
 .crosstable <- function(x,
                         by,
                         weights = NULL,
-                        include_na = TRUE,
+                        remove_na = FALSE,
                         proportions = NULL,
                         obj_name = NULL,
                         group_variable = NULL) {
@@ -12,30 +12,34 @@
   }
   # frequency table
   if (is.null(weights)) {
-    if (include_na) {
-      x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
-    } else {
+    # we have a `.default` and a `.data.frame` method for `data_tabulate()`.
+    # since this is the default, `x` can be an object which cannot be used
+    # with `table()`, that's why we add `tryCatch()` here. Below we give an
+    # informative error message for non-supported objects.
+    if (remove_na) {
       x_table <- tryCatch(table(x, by), error = function(e) NULL)
+    } else {
+      x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
     }
-  } else if (include_na) {
-    # weighted frequency table, including NA
+  } else if (remove_na) {
+    # weighted frequency table, excluding NA
     x_table <- tryCatch(
       stats::xtabs(
         weights ~ x + by,
-        data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),
-        na.action = stats::na.pass,
-        addNA = TRUE
+        data = data.frame(weights = weights, x = x, by = by),
+        na.action = stats::na.omit,
+        addNA = FALSE
       ),
       error = function(e) NULL
     )
   } else {
-    # weighted frequency table, excluding NA
+    # weighted frequency table, including NA
     x_table <- tryCatch(
       stats::xtabs(
         weights ~ x + by,
-        data = data.frame(weights = weights, x = x, by = by),
-        na.action = stats::na.omit,
-        addNA = FALSE
+        data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),
+        na.action = stats::na.pass,
+        addNA = TRUE
       ),
       error = function(e) NULL
     )
@@ -74,8 +78,9 @@
   attr(out, "total_n") <- total_n
   attr(out, "weights") <- weights
   attr(out, "proportions") <- proportions
+  attr(out, "varname") <- obj_name
 
-  class(out) <- c("dw_data_xtabulate", "data.frame")
+  class(out) <- c("datawizard_crosstab", "data.frame")
 
   out
 }
@@ -85,7 +90,7 @@
 
 
 #' @export
-format.dw_data_xtabulate <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
+format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
   # convert to character manually, else, for large numbers,
   # format_table() returns scientific notation
   x <- as.data.frame(x)
@@ -178,7 +183,7 @@ format.dw_data_xtabulate <- function(x, format = "text", digits = 1, big_mark =
 
 
 #' @export
-print.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
+print.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
   # grouped data? if yes, add information on grouping factor
   if (is.null(x[["Group"]])) {
     caption <- NULL
@@ -193,14 +198,15 @@ print.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
     cross = "+",
     missing = "<NA>",
     caption = caption,
-    empty_line = "-"
+    empty_line = "-",
+    ...
   ))
   invisible(x)
 }
 
 
 #' @export
-print_md.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
+print_md.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
   # grouped data? if yes, add information on grouping factor
   if (is.null(x[["Group"]])) {
     caption <- NULL
@@ -222,7 +228,7 @@ print_md.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
   # grouped data? if yes, add information on grouping factor
   if (!is.null(x[["Group"]])) {
     x$groups <- paste0("Grouped by ", x[["Group"]][1])
@@ -240,7 +246,7 @@ print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
+print.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
   for (i in seq_along(x)) {
     print(x[[i]], big_mark = big_mark, ...)
     cat("\n")
@@ -250,7 +256,7 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_html.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
   if (length(x) == 1) {
     print_html(x[[1]], big_mark = big_mark, ...)
   } else {
diff --git a/R/demean.R b/R/demean.R
index bbf7d2dfc..b5363edb6 100644
--- a/R/demean.R
+++ b/R/demean.R
@@ -12,7 +12,25 @@
 #' @param select Character vector (or formula) with names of variables to select
 #'   that should be group- and de-meaned.
 #' @param by Character vector (or formula) with the name of the variable that
-#'   indicates the group- or cluster-ID.
+#'   indicates the group- or cluster-ID. For cross-classified or nested designs,
+#'   `by` can also identify two or more variables as group- or cluster-IDs. If
+#'   the data is nested and should be treated as such, set `nested = TRUE`. Else,
+#'   if `by` defines two or more variables and `nested = FALSE`, a cross-classified
+#'   design is assumed. Note that `demean()` and `degroup()` can't handle a mix
+#'   of nested and cross-classified designs in one model.
+#'
+#'   For nested designs, `by` can be:
+#'   - a character vector with the name of the variable that indicates the
+#'     levels, ordered from *highest* level to *lowest* (e.g.
+#'     `by = c("L4", "L3", "L2")`.
+#'   - a character vector with variable names in the format `by = "L4/L3/L2"`,
+#'     where the levels are separated by `/`.
+#'
+#'   See also section _De-meaning for cross-classified designs_ and
+#'   _De-meaning for nested designs_ below.
+#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`,
+#'   the data is treated as cross-classified. Only applies if `by` contains more
+#'   than one variable.
 #' @param center Method for centering. `demean()` always performs
 #'   mean-centering, while `degroup()` can use `center = "median"` or
 #'   `center = "mode"` for median- or mode-centering, and also `"min"`
@@ -25,174 +43,213 @@
 #'   attributes to indicate the within- and between-effects. This is only
 #'   relevant when printing `model_parameters()` - in such cases, the
 #'   within- and between-effects are printed in separated blocks.
-#' @param group Deprecated. Use `by` instead.
 #' @inheritParams center
 #'
 #' @return
 #' A data frame with the group-/de-meaned variables, which get the suffix
 #' `"_between"` (for the group-meaned variable) and `"_within"` (for the
-#' de-meaned variable) by default.
+#' de-meaned variable) by default. For cross-classified or nested designs,
+#' the name pattern of the group-meaned variables is the name of the centered
+#' variable followed by the name of the variable that indicates the related
+#' grouping level, e.g. `predictor_L3_between` and `predictor_L2_between`.
 #'
 #' @seealso If grand-mean centering (instead of centering within-clusters)
-#'   is required, see [center()]. See [`performance::check_heterogeneity_bias()`]
+#'   is required, see [`center()`]. See [`performance::check_heterogeneity_bias()`]
 #'   to check for heterogeneity bias.
 #'
-#' @details
-#'
-#'   \subsection{Heterogeneity Bias}{
-#'     Mixed models include different levels of sources of variability, i.e.
-#'     error terms at each level. When macro-indicators (or level-2 predictors,
-#'     or higher-level units, or more general: *group-level predictors that
-#'     **vary** within and across groups*) are included as fixed effects (i.e.
-#'     treated as covariate at level-1), the variance that is left unaccounted for
-#'     this covariate will be absorbed into the error terms of level-1 and level-2
-#'     (\cite{Bafumi and Gelman 2006; Gelman and Hill 2007, Chapter 12.6.}):
-#'     \dQuote{Such covariates contain two parts: one that is specific to the
-#'     higher-level entity that does not vary between occasions, and one that
-#'     represents the difference between occasions, within higher-level entities}
-#'     (\cite{Bell et al. 2015}). Hence, the error terms will be correlated with
-#'     the covariate, which violates one of the assumptions of mixed models
-#'     (iid, independent and identically distributed error terms). This bias is
-#'     also called the *heterogeneity bias* (\cite{Bell et al. 2015}). To
-#'     resolve this problem, level-2 predictors used as (level-1) covariates should
-#'     be separated into their "within" and "between" effects by "de-meaning" and
-#'     "group-meaning": After demeaning time-varying predictors, \dQuote{at the
-#'     higher level, the mean term is no longer constrained by Level 1 effects,
-#'     so it is free to account for all the higher-level variance associated
-#'     with that variable} (\cite{Bell et al. 2015}).
-#'   }
-#'
-#'   \subsection{Panel data and correlating fixed and group effects}{
-#'     `demean()` is intended to create group- and de-meaned variables
-#'     for panel regression models (fixed effects models), or for complex
-#'     random-effect-within-between models (see \cite{Bell et al. 2015, 2018}),
-#'     where group-effects (random effects) and fixed effects correlate (see
-#'     \cite{Bafumi and Gelman 2006}). This can happen, for instance, when
-#'     analyzing panel data, which can lead to *Heterogeneity Bias*. To
-#'     control for correlating predictors and group effects, it is recommended
-#'     to include the group-meaned and de-meaned version of *time-varying covariates*
-#'     (and group-meaned version of *time-invariant covariates* that are on
-#'     a higher level, e.g. level-2 predictors) in the model. By this, one can
-#'     fit complex multilevel models for panel data, including time-varying
-#'     predictors, time-invariant predictors and random effects.
-#'    }
-#'
-#'   \subsection{Why mixed models are preferred over fixed effects models}{
-#'     A mixed models approach can model the causes of endogeneity explicitly
-#'     by including the (separated) within- and between-effects of time-varying
-#'     fixed effects and including time-constant fixed effects. Furthermore,
-#'     mixed models also include random effects, thus a mixed models approach
-#'     is superior to classic fixed-effects models, which lack information of
-#'     variation in the group-effects or between-subject effects. Furthermore,
-#'     fixed effects regression cannot include random slopes, which means that
-#'     fixed effects regressions are neglecting \dQuote{cross-cluster differences
-#'     in the effects of lower-level controls (which) reduces the precision of
-#'     estimated context effects, resulting in unnecessarily wide confidence
-#'     intervals and low statistical power} (\cite{Heisig et al. 2017}).
-#'   }
-#'
-#'   \subsection{Terminology}{
-#'     The group-meaned variable is simply the mean of an independent variable
-#'     within each group (or id-level or cluster) represented by `by`.
-#'     It represents the cluster-mean of an independent variable. The regression
-#'     coefficient of a group-meaned variable is the *between-subject-effect*.
-#'     The de-meaned variable is then the centered version of the group-meaned
-#'     variable. De-meaning is sometimes also called person-mean centering or
-#'     centering within clusters. The regression coefficient of a de-meaned
-#'     variable represents the *within-subject-effect*.
-#'   }
-#'
-#'   \subsection{De-meaning with continuous predictors}{
-#'     For continuous time-varying predictors, the recommendation is to include
-#'     both their de-meaned and group-meaned versions as fixed effects, but not
-#'     the raw (untransformed) time-varying predictors themselves. The de-meaned
-#'     predictor should also be included as random effect (random slope). In
-#'     regression models, the coefficient of the de-meaned predictors indicates
-#'     the within-subject effect, while the coefficient of the group-meaned
-#'     predictor indicates the between-subject effect.
-#'   }
-#'
-#'   \subsection{De-meaning with binary predictors}{
-#'     For binary time-varying predictors, there are two recommendations. First
-#'     is to include the raw (untransformed) binary predictor as fixed effect
-#'     only and the *de-meaned* variable as random effect (random slope).
-#'     The alternative would be to add the de-meaned version(s) of binary
-#'     time-varying covariates as additional fixed effect as well (instead of
-#'     adding it as random slope). Centering time-varying binary variables to
-#'     obtain within-effects (level 1) isn't necessary. They have a sensible
-#'     interpretation when left in the typical 0/1 format (\cite{Hoffmann 2015,
-#'     chapter 8-2.I}). `demean()` will thus coerce categorical time-varying
-#'     predictors to numeric to compute the de- and group-meaned versions for
-#'     these variables, where the raw (untransformed) binary predictor and the
-#'     de-meaned version should be added to the model.
-#'   }
-#'
-#'   \subsection{De-meaning of factors with more than 2 levels}{
-#'     Factors with more than two levels are demeaned in two ways: first, these
-#'     are also converted to numeric and de-meaned; second, dummy variables
-#'     are created (binary, with 0/1 coding for each level) and these binary
-#'     dummy-variables are de-meaned in the same way (as described above).
-#'     Packages like \pkg{panelr} internally convert factors to dummies before
-#'     demeaning, so this behaviour can be mimicked here.
-#'   }
-#'
-#'   \subsection{De-meaning interaction terms}{ There are multiple ways to deal
-#'   with interaction terms of within- and between-effects. A classical approach
-#'   is to simply use the product term of the de-meaned variables (i.e.
-#'   introducing the de-meaned variables as interaction term in the model
-#'   formula, e.g. `y ~ x_within * time_within`). This approach, however,
-#'   might be subject to bias (see \cite{Giesselmann & Schmidt-Catran 2020}).
-#'     \cr \cr
-#'     Another option is to first calculate the product term and then apply the
-#'     de-meaning to it. This approach produces an estimator \dQuote{that reflects
-#'     unit-level differences of interacted variables whose moderators vary
-#'     within units}, which is desirable if *no* within interaction of
-#'     two time-dependent variables is required. \cr \cr
-#'     A third option, when the interaction should result in a genuine within
-#'     estimator, is to "double de-mean" the interaction terms
-#'     (\cite{Giesselmann & Schmidt-Catran 2018}), however, this is currently
-#'     not supported by `demean()`. If this is required, the `wmb()`
-#'     function from the \pkg{panelr} package should be used. \cr \cr
-#'     To de-mean interaction terms for within-between models, simply specify
-#'     the term as interaction for the `select`-argument, e.g.
-#'     `select = "a*b"` (see 'Examples').
-#'   }
-#'
-#'   \subsection{Analysing panel data with mixed models using lme4}{
-#'     A description of how to translate the
-#'     formulas described in *Bell et al. 2018* into R using `lmer()`
-#'     from \pkg{lme4} can be found in
-#'     [this vignette](https://easystats.github.io/parameters/articles/demean.html).
-#'   }
+#' @section Heterogeneity Bias:
+#'
+#' Mixed models include different levels of sources of variability, i.e.
+#' error terms at each level. When macro-indicators (or level-2 predictors,
+#' or higher-level units, or more general: *group-level predictors that
+#' **vary** within and across groups*) are included as fixed effects (i.e.
+#' treated as covariate at level-1), the variance that is left unaccounted for
+#' this covariate will be absorbed into the error terms of level-1 and level-2
+#' (_Bafumi and Gelman 2006; Gelman and Hill 2007, Chapter 12.6._):
+#' "Such covariates contain two parts: one that is specific to the higher-level
+#' entity that does not vary between occasions, and one that represents the
+#' difference between occasions, within higher-level entities" (_Bell et al. 2015_).
+#' Hence, the error terms will be correlated with the covariate, which violates
+#' one of the assumptions of mixed models (iid, independent and identically
+#' distributed error terms). This bias is also called the *heterogeneity bias*
+#' (_Bell et al. 2015_). To resolve this problem, level-2 predictors used as
+#' (level-1) covariates should be separated into their "within" and "between"
+#' effects by "de-meaning" and "group-meaning": After demeaning time-varying
+#' predictors, "at the higher level, the mean term is no longer constrained by
+#' Level 1 effects, so it is free to account for all the higher-level variance
+#' associated with that variable" (_Bell et al. 2015_).
+#'
+#' @section Panel data and correlating fixed and group effects:
+#'
+#' `demean()` is intended to create group- and de-meaned variables for panel
+#' regression models (fixed effects models), or for complex
+#' random-effect-within-between models (see _Bell et al. 2015, 2018_), where
+#' group-effects (random effects) and fixed effects correlate (see
+#' _Bafumi and Gelman 2006_). This can happen, for instance, when analyzing
+#' panel data, which can lead to *Heterogeneity Bias*. To control for correlating
+#' predictors and group effects, it is recommended to include the group-meaned
+#' and de-meaned version of *time-varying covariates* (and group-meaned version
+#' of *time-invariant covariates* that are on a higher level, e.g. level-2
+#' predictors) in the model. By this, one can fit complex multilevel models for
+#' panel data, including time-varying predictors, time-invariant predictors and
+#' random effects.
+#'
+#' @section Why mixed models are preferred over fixed effects models:
+#'
+#' A mixed models approach can model the causes of endogeneity explicitly
+#' by including the (separated) within- and between-effects of time-varying
+#' fixed effects and including time-constant fixed effects. Furthermore,
+#' mixed models also include random effects, thus a mixed models approach
+#' is superior to classic fixed-effects models, which lack information of
+#' variation in the group-effects or between-subject effects. Furthermore,
+#' fixed effects regression cannot include random slopes, which means that
+#' fixed effects regressions are neglecting "cross-cluster differences in the
+#' effects of lower-level controls (which) reduces the precision of estimated
+#' context effects, resulting in unnecessarily wide confidence intervals and
+#' low statistical power" (_Heisig et al. 2017_).
+#'
+#' @section Terminology:
+#'
+#' The group-meaned variable is simply the mean of an independent variable
+#' within each group (or id-level or cluster) represented by `by`. It represents
+#' the cluster-mean of an independent variable. The regression coefficient of a
+#' group-meaned variable is the *between-subject-effect*. The de-meaned variable
+#' is then the centered version of the group-meaned variable. De-meaning is
+#' sometimes also called person-mean centering or centering within clusters.
+#' The regression coefficient of a de-meaned variable represents the
+#' *within-subject-effect*.
+#'
+#' @section De-meaning with continuous predictors:
+#'
+#' For continuous time-varying predictors, the recommendation is to include
+#' both their de-meaned and group-meaned versions as fixed effects, but not
+#' the raw (untransformed) time-varying predictors themselves. The de-meaned
+#' predictor should also be included as random effect (random slope). In
+#' regression models, the coefficient of the de-meaned predictors indicates
+#' the within-subject effect, while the coefficient of the group-meaned
+#' predictor indicates the between-subject effect.
+#'
+#' @section De-meaning with binary predictors:
+#'
+#' For binary time-varying predictors, there are two recommendations. First
+#' is to include the raw (untransformed) binary predictor as fixed effect
+#' only and the *de-meaned* variable as random effect (random slope).
+#' The alternative would be to add the de-meaned version(s) of binary
+#' time-varying covariates as additional fixed effect as well (instead of
+#' adding it as random slope). Centering time-varying binary variables to
+#' obtain within-effects (level 1) isn't necessary. They have a sensible
+#' interpretation when left in the typical 0/1 format (_Hoffmann 2015,
+#' chapter 8-2.I_). `demean()` will thus coerce categorical time-varying
+#' predictors to numeric to compute the de- and group-meaned versions for
+#' these variables, where the raw (untransformed) binary predictor and the
+#' de-meaned version should be added to the model.
+#'
+#' @section De-meaning of factors with more than 2 levels:
+#'
+#' Factors with more than two levels are demeaned in two ways: first, these
+#' are also converted to numeric and de-meaned; second, dummy variables
+#' are created (binary, with 0/1 coding for each level) and these binary
+#' dummy-variables are de-meaned in the same way (as described above).
+#' Packages like **panelr** internally convert factors to dummies before
+#' demeaning, so this behaviour can be mimicked here.
+#'
+#' @section De-meaning interaction terms:
+#'
+#' There are multiple ways to deal with interaction terms of within- and
+#' between-effects.
+#'
+#' - A classical approach is to simply use the product term of the de-meaned
+#'   variables (i.e. introducing the de-meaned variables as interaction term
+#'   in the model formula, e.g. `y ~ x_within * time_within`). This approach,
+#'   however, might be subject to bias (see _Giesselmann & Schmidt-Catran 2020_).
+#'
+#' - Another option is to first calculate the product term and then apply the
+#'   de-meaning to it. This approach produces an estimator "that reflects
+#'   unit-level differences of interacted variables whose moderators vary
+#'   within units", which is desirable if *no* within interaction of
+#'   two time-dependent variables is required. This is what `demean()` does
+#'   internally when `select` contains interaction terms.
+#'
+#' - A third option, when the interaction should result in a genuine within
+#'   estimator, is to "double de-mean" the interaction terms
+#'   (_Giesselmann & Schmidt-Catran 2018_), however, this is currently
+#'   not supported by `demean()`. If this is required, the `wmb()`
+#'   function from the **panelr** package should be used.
+#'
+#' To de-mean interaction terms for within-between models, simply specify
+#' the term as interaction for the `select`-argument, e.g. `select = "a*b"`
+#' (see 'Examples').
+#'
+#' @section De-meaning for cross-classified designs:
+#'
+#' `demean()` can handle cross-classified designs, where the data has two or
+#' more groups at the higher (i.e. second) level. In such cases, the
+#' `by`-argument can identify two or more variables that represent the
+#'  cross-classified group- or cluster-IDs. The de-meaned variables for
+#' cross-classified designs are simply subtracting all group means from each
+#' individual value, i.e. _fully cluster-mean-centering_ (see _Guo et al. 2024_
+#' for details). Note that de-meaning for cross-classified designs is *not*
+#' equivalent to de-meaning of nested data structures from models with three or
+#' more levels. Set `nested = TRUE` to explicitly assume a nested design. For
+#' cross-classified designs, de-meaning is supposed to work for models like
+#' `y ~ x + (1|level3) + (1|level2)`, but *not* for models like
+#' `y ~ x + (1|level3/level2)`. Note that `demean()` and `degroup()` can't
+#' handle a mix of nested and cross-classified designs in one model.
+#'
+#' @section De-meaning for nested designs:
+#'
+#' _Brincks et al. (2017)_ have suggested an algorithm to center variables for
+#' nested designs, which is implemented in `demean()`. For nested designs, set
+#' `nested = TRUE` *and* specify the variables that indicate the different
+#' levels in descending order in the `by` argument. E.g.,
+#' `by = c("level4", "level3, "level2")` assumes a model like
+#' `y ~ x + (1|level4/level3/level2)`. An alternative notation for the
+#' `by`-argument would be `by = "level4/level3/level2"`, similar to the
+#' formula notation.
+#'
+#' @section Analysing panel data with mixed models using lme4:
+#'
+#' A description of how to translate the formulas described in *Bell et al. 2018*
+#' into R using `lmer()` from **lme4** can be found in
+#' [this vignette](https://easystats.github.io/parameters/articles/demean.html).
 #'
 #' @references
 #'
 #'   - Bafumi J, Gelman A. 2006. Fitting Multilevel Models When Predictors
-#'   and Group Effects Correlate. In. Philadelphia, PA: Annual meeting of the
-#'   American Political Science Association.
+#'     and Group Effects Correlate. In. Philadelphia, PA: Annual meeting of the
+#'     American Political Science Association.
 #'
 #'   - Bell A, Fairbrother M, Jones K. 2019. Fixed and Random Effects
-#'   Models: Making an Informed Choice. Quality & Quantity (53); 1051-1074
+#'     Models: Making an Informed Choice. Quality & Quantity (53); 1051-1074
 #'
 #'   - Bell A, Jones K. 2015. Explaining Fixed Effects: Random Effects
-#'   Modeling of Time-Series Cross-Sectional and Panel Data. Political Science
-#'   Research and Methods, 3(1), 133–153.
+#'     Modeling of Time-Series Cross-Sectional and Panel Data. Political Science
+#'     Research and Methods, 3(1), 133–153.
+#'
+#'   - Brincks, A. M., Enders, C. K., Llabre, M. M., Bulotsky-Shearer, R. J.,
+#'     Prado, G., and Feaster, D. J. (2017). Centering Predictor Variables in
+#'     Three-Level Contextual Models. Multivariate Behavioral Research, 52(2),
+#'     149–163. https://doi.org/10.1080/00273171.2016.1256753
 #'
 #'   - Gelman A, Hill J. 2007. Data Analysis Using Regression and
-#'   Multilevel/Hierarchical Models. Analytical Methods for Social Research.
-#'   Cambridge, New York: Cambridge University Press
+#'     Multilevel/Hierarchical Models. Analytical Methods for Social Research.
+#'     Cambridge, New York: Cambridge University Press
 #'
 #'   - Giesselmann M, Schmidt-Catran, AW. 2020. Interactions in fixed
-#'   effects regression models. Sociological Methods & Research, 1–28.
-#'   https://doi.org/10.1177/0049124120914934
+#'     effects regression models. Sociological Methods & Research, 1–28.
+#'     https://doi.org/10.1177/0049124120914934
+#'
+#'   - Guo Y, Dhaliwal J, Rights JD. 2024. Disaggregating level-specific effects
+#'     in cross-classified multilevel models. Behavior Research Methods, 56(4),
+#'     3023–3057.
 #'
 #'   - Heisig JP, Schaeffer M, Giesecke J. 2017. The Costs of Simplicity:
-#'   Why Multilevel Models May Benefit from Accounting for Cross-Cluster
-#'   Differences in the Effects of Controls. American Sociological Review 82
-#'   (4): 796–827.
+#'     Why Multilevel Models May Benefit from Accounting for Cross-Cluster
+#'     Differences in the Effects of Controls. American Sociological Review 82
+#'     (4): 796–827.
 #'
 #'   - Hoffman L. 2015. Longitudinal analysis: modeling within-person
-#'   fluctuation and change. New York: Routledge
+#'     fluctuation and change. New York: Routledge
 #'
 #' @examples
 #'
@@ -223,21 +280,16 @@
 demean <- function(x,
                    select,
                    by,
+                   nested = FALSE,
                    suffix_demean = "_within",
                    suffix_groupmean = "_between",
                    add_attributes = TRUE,
-                   verbose = TRUE,
-                   group = NULL) {
-  ## TODO: remove warning in future release
-  if (!is.null(group)) {
-    by <- group
-    insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
-  }
-
+                   verbose = TRUE) {
   degroup(
     x = x,
     select = select,
     by = by,
+    nested = nested,
     center = "mean",
     suffix_demean = suffix_demean,
     suffix_groupmean = suffix_groupmean,
@@ -247,47 +299,48 @@ demean <- function(x,
 }
 
 
-
-
-
-
 #' @rdname demean
 #' @export
 degroup <- function(x,
                     select,
                     by,
+                    nested = FALSE,
                     center = "mean",
                     suffix_demean = "_within",
                     suffix_groupmean = "_between",
                     add_attributes = TRUE,
-                    verbose = TRUE,
-                    group = NULL) {
-  ## TODO: remove warning later
-  if (!is.null(group)) {
-    by <- group
-    insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
-  }
-
+                    verbose = TRUE) {
   # ugly tibbles again...
   x <- .coerce_to_dataframe(x)
 
   center <- match.arg(tolower(center), choices = c("mean", "median", "mode", "min", "max"))
 
   if (inherits(select, "formula")) {
-    # formula to character, remove "~", split at "+"
+    # formula to character, remove "~", split at "+". We don't use `all.vars()`
+    # here because we want to keep the interaction terms as they are
     select <- trimws(unlist(
       strsplit(gsub("~", "", insight::safe_deparse(select), fixed = TRUE), "+", fixed = TRUE),
       use.names = FALSE
     ))
   }
 
+  # handle different "by" options
   if (inherits(by, "formula")) {
     by <- all.vars(by)
   }
 
+  # we also allow lme4-syntax here: if by = "L4/L3/L2", we assume a nested design
+  if (length(by) == 1 && grepl("/", by, fixed = TRUE)) {
+    by <- insight::trim_ws(unlist(strsplit(by, "/", fixed = TRUE), use.names = FALSE))
+    nested <- TRUE
+  }
+
+  # identify interaction terms
   interactions_no <- select[!grepl("(\\*|\\:)", select)]
   interactions_yes <- select[grepl("(\\*|\\:)", select)]
 
+  # if we have interaction terms that should be de-meaned, calculate the product
+  # of the terms first, then demean the product
   if (length(interactions_yes)) {
     interaction_terms <- lapply(strsplit(interactions_yes, "*", fixed = TRUE), trimws)
     product <- lapply(interaction_terms, function(i) do.call(`*`, x[, i]))
@@ -296,20 +349,22 @@ degroup <- function(x,
     select <- c(interactions_no, colnames(new_dat))
   }
 
-  not_found <- setdiff(select, colnames(x))
-
-  if (length(not_found) && isTRUE(verbose)) {
-    insight::format_alert(
-      sprintf(
-        "%i variables were not found in the dataset: %s\n",
-        length(not_found),
-        toString(not_found)
-      )
+  # check if all variables are present
+  not_found <- setdiff(c(select, by), colnames(x))
+
+  if (length(not_found)) {
+    insight::format_error(
+      paste0(
+        "Variable",
+        ifelse(length(not_found) > 1, "s ", " "),
+        text_concatenate(not_found, enclose = "\""),
+        ifelse(length(not_found) > 1, " were", " was"),
+        " not found in the dataset."
+      ),
+      .misspelled_string(colnames(x), not_found, "Possibly misspelled or not yet defined?")
     )
   }
 
-  select <- intersect(colnames(x), select)
-
   # get data to demean...
   dat <- x[, c(select, by)]
 
@@ -366,37 +421,92 @@ degroup <- function(x,
     max = function(.gm) max(.gm, na.rm = TRUE),
     function(.gm) mean(.gm, na.rm = TRUE)
   )
-  x_gm_list <- lapply(select, function(i) {
-    stats::ave(dat[[i]], dat[[by]], FUN = gm_fun)
-  })
-  names(x_gm_list) <- select
 
-  # create de-meaned variables by subtracting the group mean from each individual value
+  # we allow disaggregating level-specific effects for cross-classified multilevel
+  # models (see Guo et al. 2024). Two levels should work as proposed by the authors,
+  # more levels also already work, but need to check the formula from the paper
+  # and validate results
 
-  x_dm_list <- lapply(select, function(i) dat[[i]] - x_gm_list[[i]])
-  names(x_dm_list) <- select
+  if (length(by) == 1) {
+    # simple case: one level
+    group_means_list <- lapply(select, function(i) {
+      stats::ave(dat[[i]], dat[[by]], FUN = gm_fun)
+    })
+    names(group_means_list) <- select
+    # create de-meaned variables by subtracting the group mean from each individual value
+    person_means_list <- lapply(select, function(i) dat[[i]] - group_means_list[[i]])
+  } else if (nested) {
+    # nested design: by > 1, nested is explicitly set to TRUE
+    # We want:
+    # L3_between = xbar(k)
+    # L2_between = xbar(j,k) - xbar(k)
+    # L1_within = x(ijk) - xbar(jk)
+    # , where
+    # x(ijk) is the individual value / variable that is measured on level 1
+    # xbar(k) <- ave(x_ijk, L3, FUN = mean), the group mean of the variable at highest level
+    # xbar(jk) <- ave(x_ijk, L3, L2, FUN = mean), the group mean of the variable at second level
+    group_means_list <- lapply(select, function(i) {
+      out <- lapply(seq_along(by), function(k) {
+        dat$higher_levels <- do.call(paste, c(dat[by[1:k]], list(sep = "_")))
+        stats::ave(dat[[i]], dat$higher_levels, FUN = gm_fun)
+      })
+      # subtract mean of higher level from lower level
+      for (j in 2:length(by)) {
+        out[[j]] <- out[[j]] - out[[j - 1]]
+      }
+      names(out) <- paste0(select, "_", by)
+      out
+    })
+    # create de-meaned variables by subtracting the group mean from each individual value
+    person_means_list <- lapply(
+      # seq_along(select),
+      # function(i) dat[[select[i]]] - group_means_list[[i]][[length(by)]]
+      select,
+      function(i) {
+        dat$higher_levels <- do.call(paste, c(dat[by], list(sep = "_")))
+        dat[[i]] - stats::ave(dat[[i]], dat$higher_levels, FUN = gm_fun)
+      }
+    )
+  } else {
+    # cross-classified design: by > 1
+    group_means_list <- lapply(by, function(j) {
+      out <- lapply(select, function(i) {
+        stats::ave(dat[[i]], dat[[j]], FUN = gm_fun)
+      })
+      names(out) <- paste0(select, "_", j)
+      out
+    })
+    # de-meaned variables for cross-classified design is simply subtracting
+    # all group means from each individual value
+    person_means_list <- lapply(seq_along(select), function(i) {
+      sum_group_means <- do.call(`+`, lapply(group_means_list, function(j) j[[i]]))
+      dat[[select[i]]] - sum_group_means
+    })
+  }
 
+  # preserve names
+  names(person_means_list) <- select
 
   # convert to data frame and add suffix to column names
 
-  x_gm <- as.data.frame(x_gm_list)
-  x_dm <- as.data.frame(x_dm_list)
+  group_means <- as.data.frame(group_means_list)
+  person_means <- as.data.frame(person_means_list)
 
-  colnames(x_dm) <- sprintf("%s%s", colnames(x_dm), suffix_demean)
-  colnames(x_gm) <- sprintf("%s%s", colnames(x_gm), suffix_groupmean)
+  colnames(person_means) <- sprintf("%s%s", colnames(person_means), suffix_demean)
+  colnames(group_means) <- sprintf("%s%s", colnames(group_means), suffix_groupmean)
 
   if (isTRUE(add_attributes)) {
-    x_dm[] <- lapply(x_dm, function(i) {
+    person_means[] <- lapply(person_means, function(i) {
       attr(i, "within-effect") <- TRUE
       i
     })
-    x_gm[] <- lapply(x_gm, function(i) {
+    group_means[] <- lapply(group_means, function(i) {
       attr(i, "between-effect") <- TRUE
       i
     })
   }
 
-  cbind(x_gm, x_dm)
+  cbind(group_means, person_means)
 }
 
 
diff --git a/R/describe_distribution.R b/R/describe_distribution.R
index 41f2a8b83..64f6e29c1 100644
--- a/R/describe_distribution.R
+++ b/R/describe_distribution.R
@@ -186,11 +186,24 @@ describe_distribution.numeric <- function(x,
   # Confidence Intervals
   if (!is.null(ci)) {
     insight::check_if_installed("boot")
-    results <- boot::boot(
-      data = x,
-      statistic = .boot_distribution,
-      R = iterations,
-      centrality = centrality
+    results <- tryCatch(
+      {
+        boot::boot(
+          data = x,
+          statistic = .boot_distribution,
+          R = iterations,
+          centrality = centrality
+        )
+      },
+      error = function(e) {
+        msg <- conditionMessage(e)
+        if (!is.null(msg) && msg == "sample is too sparse to find TD") {
+          insight::format_warning(
+            "When bootstrapping CIs, sample was too sparse to find TD. Returning NA for CIs."
+          )
+          list(t = c(NA_real_, NA_real_))
+        }
+      }
     )
     out_ci <- bayestestR::ci(results$t, ci = ci, verbose = FALSE)
     out <- cbind(out, data.frame(CI_low = out_ci$CI_low[1], CI_high = out_ci$CI_high[1]))
@@ -500,7 +513,7 @@ print.parameters_distribution <- function(x, digits = 2, ...) {
     ci_brackets = TRUE,
     ...
   )
-  cat(insight::export_table(formatted_table, format = "text", digits = digits))
+  cat(insight::export_table(formatted_table, format = "text", digits = digits, ...))
   invisible(x)
 }
 
diff --git a/R/descriptives.R b/R/descriptives.R
index 097934d29..43479f697 100644
--- a/R/descriptives.R
+++ b/R/descriptives.R
@@ -77,7 +77,6 @@ coef_var.default <- function(x, verbose = TRUE, ...) {
 #'   as the nearest endpoint.
 #' @param remove_na Logical. Should `NA` values be removed before computing (`TRUE`)
 #'   or not (`FALSE`, default)?
-#' @param na.rm Deprecated. Please use `remove_na` instead.
 #' @param n If `method = "unbiased"` and both `mu` and `sigma` are provided (not
 #'   computed from `x`), what sample size to use to adjust the computed CV
 #'   for small-sample bias?
@@ -111,13 +110,7 @@ coef_var.default <- function(x, verbose = TRUE, ...) {
 #' @export
 coef_var.numeric <- function(x, mu = NULL, sigma = NULL,
                              method = c("standard", "unbiased", "median_mad", "qcd"),
-                             trim = 0, remove_na = FALSE, n = NULL, na.rm = FALSE, ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    insight::format_warning("Argument `na.rm` is deprecated. Please use `remove_na` instead.")
-    remove_na <- na.rm
-  }
-
+                             trim = 0, remove_na = FALSE, n = NULL, ...) {
   # TODO: Support weights
   if (!missing(x) && all(c(-1, 1) %in% sign(x))) {
     insight::format_error("Coefficient of variation only applicable for ratio scale variables.")
diff --git a/R/extract_column_names.R b/R/extract_column_names.R
index b89173a8c..a3d120d3f 100644
--- a/R/extract_column_names.R
+++ b/R/extract_column_names.R
@@ -9,8 +9,10 @@
 #'   tasks. Can be either
 #'
 #'   - a variable specified as a literal variable name (e.g., `column_name`),
-#'   - a string with the variable name (e.g., `"column_name"`), or a character
-#'     vector of variable names (e.g., `c("col1", "col2", "col3")`),
+#'   - a string with the variable name (e.g., `"column_name"`), a character
+#'     vector of variable names (e.g., `c("col1", "col2", "col3")`), or a
+#'     character vector of variable names including ranges specified via `:`
+#'     (e.g., `c("col1:col3", "col5")`),
 #'   - a formula with variable names (e.g., `~column_1 + column_2`),
 #'   - a vector of positive integers, giving the positions counting from the left
 #'     (e.g. `1` or `c(1, 3, 5)`),
@@ -116,7 +118,7 @@
 #' ```
 #'
 #' @examples
-#' # Find columns names by pattern
+#' # Find column names by pattern
 #' extract_column_names(iris, starts_with("Sepal"))
 #' extract_column_names(iris, ends_with("Width"))
 #' extract_column_names(iris, regex("\\."))
@@ -129,6 +131,9 @@
 #' numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5
 #' extract_column_names(iris, numeric_mean_35)
 #'
+#' # find range of colum names by range, using character vector
+#' extract_column_names(mtcars, c("cyl:hp", "wt"))
+#'
 #' # rename returned columns for "data_select()"
 #' head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl")))
 #' @export
@@ -160,28 +165,6 @@ extract_column_names <- function(data,
   columns
 }
 
-
-#' @rdname extract_column_names
-#' @export
-data_find <- function(data,
-                      select = NULL,
-                      exclude = NULL,
-                      ignore_case = FALSE,
-                      regex = FALSE,
-                      verbose = TRUE,
-                      ...) {
-  insight::format_warning("Function `data_find()` is deprecated and will be removed in a future release. Please use `extract_column_names()` instead.") # nolint
-  extract_column_names(
-    data,
-    select = select,
-    exclude = exclude,
-    ignore_case = ignore_case,
-    regex = regex,
-    verbose = verbose,
-    ...
-  )
-}
-
 #' @rdname extract_column_names
 #' @export
-find_columns <- data_find
+find_columns <- extract_column_names
diff --git a/R/mean_sd.R b/R/mean_sd.R
index d18473d8d..42ce9b523 100644
--- a/R/mean_sd.R
+++ b/R/mean_sd.R
@@ -20,23 +20,13 @@
 #' median_mad(mtcars$mpg)
 #'
 #' @export
-mean_sd <- function(x, times = 1L, remove_na = TRUE, named = TRUE, na.rm = TRUE, ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    insight::format_warning("Argument `na.rm` is deprecated. Please use `remove_na` instead.")
-    remove_na <- na.rm
-  }
+mean_sd <- function(x, times = 1L, remove_na = TRUE, named = TRUE, ...) {
   .centrality_dispersion(x, type = "mean", times = times, remove_na = remove_na, named = named)
 }
 
 #' @export
 #' @rdname mean_sd
-median_mad <- function(x, times = 1L, remove_na = TRUE, constant = 1.4826, named = TRUE, na.rm = TRUE, ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    insight::format_warning("Argument `na.rm` is deprecated. Please use `remove_na` instead.")
-    remove_na <- na.rm
-  }
+median_mad <- function(x, times = 1L, remove_na = TRUE, constant = 1.4826, named = TRUE, ...) {
   .centrality_dispersion(x, type = "median", times = times, remove_na = remove_na, constant = constant, named = named)
 }
 
diff --git a/R/means_by_group.R b/R/means_by_group.R
index ad188f275..39416bb11 100644
--- a/R/means_by_group.R
+++ b/R/means_by_group.R
@@ -19,7 +19,6 @@
 #' @param digits Optional scalar, indicating the amount of digits after decimal
 #' point when rounding estimates and values.
 #' @param ... Currently not used
-#' @param group Deprecated. Use `by` instead.
 #' @inheritParams find_columns
 #'
 #' @return A data frame with information on mean and further summary statistics
@@ -60,14 +59,7 @@ means_by_group.numeric <- function(x,
                                    ci = 0.95,
                                    weights = NULL,
                                    digits = NULL,
-                                   group = NULL,
                                    ...) {
-  ## TODO: remove warning in future release
-  if (!is.null(group)) {
-    by <- group
-    insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
-  }
-
   # validation check for arguments
 
   # "by" must be provided
@@ -139,14 +131,7 @@ means_by_group.data.frame <- function(x,
                                       ignore_case = FALSE,
                                       regex = FALSE,
                                       verbose = TRUE,
-                                      group = NULL,
                                       ...) {
-  ## TODO: remove warning in future release
-  if (!is.null(group)) {
-    by <- group
-    insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
-  }
-
   # evaluate select/exclude, may be select-helpers
   select <- .select_nse(select,
     x,
diff --git a/R/recode_values.R b/R/recode_values.R
index b4570bf44..a8e8d6d3b 100644
--- a/R/recode_values.R
+++ b/R/recode_values.R
@@ -527,35 +527,3 @@ recode_values.data.frame <- function(x,
 
   ok
 }
-
-
-## TODO Deprecate and remove alias later
-
-#' @rdname recode_values
-#' @export
-change_code <- function(x,
-                        select = NULL,
-                        exclude = NULL,
-                        recode = NULL,
-                        default = NULL,
-                        preserve_na = TRUE,
-                        append = FALSE,
-                        ignore_case = FALSE,
-                        regex = FALSE,
-                        verbose = TRUE,
-                        ...) {
-  insight::format_warning("Function `change_code()` is deprecated. Please use `recode_values()` instead.") # nolint
-  recode_values(
-    x,
-    select = select,
-    exclude = exclude,
-    recode = recode,
-    default = default,
-    preserve_na = preserve_na,
-    append = append,
-    ignore_case = ignore_case,
-    regex = regex,
-    verbose = verbose,
-    ...
-  )
-}
diff --git a/R/rescale_weights.R b/R/rescale_weights.R
index 02aab1d2e..60d405c9d 100644
--- a/R/rescale_weights.R
+++ b/R/rescale_weights.R
@@ -20,7 +20,6 @@
 #' @param nest Logical, if `TRUE` and `by` indicates at least two
 #'   group variables, then groups are "nested", i.e. groups are now a
 #'   combination from each group level of the variables in `by`.
-#' @param group Deprecated. Use `by` instead.
 #'
 #' @return `data`, including the new weighting variables: `pweights_a`
 #'   and `pweights_b`, which represent the rescaled design weights to use
@@ -88,13 +87,7 @@
 #'   )
 #' }
 #' @export
-rescale_weights <- function(data, by, probability_weights, nest = FALSE, group = NULL) {
-  ## TODO: remove warning in future release
-  if (!is.null(group)) {
-    by <- group
-    insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
-  }
-
+rescale_weights <- function(data, by, probability_weights, nest = FALSE) {
   if (inherits(by, "formula")) {
     by <- all.vars(by)
   }
diff --git a/R/reshape_ci.R b/R/reshape_ci.R
index 99a670a2d..dcfc729a8 100644
--- a/R/reshape_ci.R
+++ b/R/reshape_ci.R
@@ -43,15 +43,20 @@ reshape_ci <- function(x, ci_type = "CI") {
     # Reshape
     if (length(unique(x$CI)) > 1) {
       if ("Parameter" %in% names(x)) {
+        idvar <- "Parameter"
         remove_parameter <- FALSE
-      } else {
+      } else if (is.null(attr(x, "idvars"))) {
+        idvar <- "Parameter"
         x$Parameter <- NA
         remove_parameter <- TRUE
+      } else {
+        idvar <- attr(x, "idvars")
+        remove_parameter <- FALSE
       }
 
       x <- stats::reshape(
         x,
-        idvar = "Parameter",
+        idvar = idvar,
         timevar = "CI",
         direction = "wide",
         v.names = c(ci_low, ci_high),
diff --git a/R/row_count.R b/R/row_count.R
new file mode 100644
index 000000000..02b1c16dc
--- /dev/null
+++ b/R/row_count.R
@@ -0,0 +1,124 @@
+#' @title Count specific values row-wise
+#' @name row_count
+#' @description `row_count()` mimics base R's `rowSums()`, with sums for a
+#' specific value indicated by `count`. Hence, it is similar to
+#' `rowSums(x == count, na.rm = TRUE)`, but offers some more options, including
+#' strict comparisons. Comparisons using `==` coerce values to atomic vectors,
+#' thus both `2 == 2` and `"2" == 2` are `TRUE`. In `row_count()`, it is also
+#' possible to make "type safe" comparisons using the `allow_coercion` argument,
+#' where `"2" == 2` is not true.
+#'
+#' @param data A data frame with at least two columns, where number of specific
+#' values are counted row-wise.
+#' @param count The value for which the row sum should be computed. May be a
+#' numeric value, a character string (for factors or character vectors), `NA` or
+#' `Inf`.
+#' @param allow_coercion Logical. If `FALSE`, `count` matches only values of same
+#' class (i.e. when `count = 2`, the value `"2"` is not counted and vice versa).
+#' By default, when `allow_coercion = TRUE`, `count = 2` also matches `"2"`. In
+#' order to count factor levels in the data, use `count = factor("level")`. See
+#' 'Examples'.
+#'
+#' @inheritParams extract_column_names
+#' @inheritParams row_means
+#'
+#' @return A vector with row-wise counts of values specified in `count`.
+#'
+#' @examples
+#' dat <- data.frame(
+#'   c1 = c(1, 2, NA, 4),
+#'   c2 = c(NA, 2, NA, 5),
+#'   c3 = c(NA, 4, NA, NA),
+#'   c4 = c(2, 3, 7, 8)
+#' )
+#'
+#' # count all 4s per row
+#' row_count(dat, count = 4)
+#' # count all missing values per row
+#' row_count(dat, count = NA)
+#'
+#' dat <- data.frame(
+#'   c1 = c("1", "2", NA, "3"),
+#'   c2 = c(NA, "2", NA, "3"),
+#'   c3 = c(NA, 4, NA, NA),
+#'   c4 = c(2, 3, 7, Inf)
+#' )
+#' # count all 2s and "2"s per row
+#' row_count(dat, count = 2)
+#' # only count 2s, but not "2"s
+#' row_count(dat, count = 2, allow_coercion = FALSE)
+#'
+#' dat <- data.frame(
+#'   c1 = factor(c("1", "2", NA, "3")),
+#'   c2 = c("2", "1", NA, "3"),
+#'   c3 = c(NA, 4, NA, NA),
+#'   c4 = c(2, 3, 7, Inf)
+#' )
+#' # find only character "2"s
+#' row_count(dat, count = "2", allow_coercion = FALSE)
+#' # find only factor level "2"s
+#' row_count(dat, count = factor("2"), allow_coercion = FALSE)
+#'
+#' @export
+row_count <- function(data,
+                      select = NULL,
+                      exclude = NULL,
+                      count = NULL,
+                      allow_coercion = TRUE,
+                      ignore_case = FALSE,
+                      regex = FALSE,
+                      verbose = TRUE) {
+  # evaluate arguments
+  select <- .select_nse(select,
+    data,
+    exclude,
+    ignore_case = ignore_case,
+    regex = regex,
+    verbose = verbose
+  )
+
+  if (is.null(count)) {
+    insight::format_error("`count` must be a valid value (including `NA` or `Inf`), but not `NULL`.")
+  }
+
+  if (is.null(select) || length(select) == 0) {
+    insight::format_error("No columns selected.")
+  }
+
+  data <- .coerce_to_dataframe(data[select])
+
+  # check if we have a data framme with at least two columns
+  if (nrow(data) < 1) {
+    insight::format_error("`data` must be a data frame with at least one row.")
+  }
+
+  # check if we have a data framme with at least two columns
+  if (ncol(data) < 2) {
+    insight::format_error("`data` must be a data frame with at least two numeric columns.")
+  }
+  # special case: count missing
+  if (is.na(count)) {
+    rowSums(is.na(data))
+  } else {
+    # comparisons in R using == coerce values into a atomic vector, i.e.
+    # 2 == "2" is TRUE. If `allow_coercion = FALSE`, we only want 2 == 2 or
+    # "2" == "2" (i.e. we want exact types to be compared only)
+    if (isFALSE(allow_coercion)) {
+      # we need the "type" of the count-value - we use class() instead of typeof(),
+      # because the latter sometimes returns unsuitable classes/types. compare
+      # typeof(as.Date("2020-01-01")), which returns "double".
+      count_type <- class(count)[1]
+      valid_columns <- vapply(data, inherits, TRUE, what = count_type)
+      # check if any columns left?
+      if (!any(valid_columns)) {
+        insight::format_error("No column has same type as the value provided in `count`. Set `allow_coercion = TRUE` or specify a valid value for `count`.") # nolint
+      }
+      data <- data[valid_columns]
+    }
+    # coerce - we have only valid columns anyway, and we need to coerce factors
+    # to vectors, else comparison with `==` errors.
+    count <- as.vector(count)
+    # finally, count
+    rowSums(data == count, na.rm = TRUE)
+  }
+}
diff --git a/R/row_means.R b/R/row_means.R
index 4d2876c6a..729c800be 100644
--- a/R/row_means.R
+++ b/R/row_means.R
@@ -1,15 +1,16 @@
-#' @title Row means (optionally with minimum amount of valid values)
+#' @title Row means or sums (optionally with minimum amount of valid values)
 #' @name row_means
-#' @description This function is similar to the SPSS `MEAN.n` function and computes
-#' row means from a data frame or matrix if at least `min_valid` values of a row are
-#' valid (and not `NA`).
+#' @description This function is similar to the SPSS `MEAN.n` or `SUM.n`
+#' function and computes row means or row sums from a data frame or matrix if at
+#' least `min_valid` values of a row are valid (and not `NA`).
 #'
-#' @param data A data frame with at least two columns, where row means are applied.
+#' @param data A data frame with at least two columns, where row means or row
+#' sums are applied.
 #' @param min_valid Optional, a numeric value of length 1. May either be
 #' - a numeric value that indicates the amount of valid values per row to
-#'   calculate the row mean;
+#'   calculate the row mean or row sum;
 #' - or a value between `0` and `1`, indicating a proportion of valid values per
-#'   row to calculate the row mean (see 'Details').
+#'   row to calculate the row mean or row sum (see 'Details').
 #' - `NULL` (default), in which all cases are considered.
 #'
 #' If a row's sum of valid values is less than `min_valid`, `NA` will be returned.
@@ -17,21 +18,24 @@
 #' used for rounding mean values. Negative values are allowed (see 'Details').
 #' By default, `digits = NULL` and no rounding is used.
 #' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values
-#' before calculating row means. Only applies if `min_valuid` is not specified.
+#' before calculating row means or row sums. Only applies if `min_valid` is not
+#' specified.
 #' @param verbose Toggle warnings.
 #' @inheritParams extract_column_names
 #'
-#' @return A vector with row means for those rows with at least `n` valid values.
+#' @return A vector with row means (for `row_means()`) or row sums (for
+#' `row_sums()`) for those rows with at least `n` valid values.
 #'
-#' @details Rounding to a negative number of `digits` means rounding to a power of
-#' ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest hundred.
-#' For `min_valid`, if not `NULL`, `min_valid` must be a numeric value from `0`
-#' to `ncol(data)`. If a row in the data frame has at least `min_valid`
-#' non-missing values, the row mean is returned. If `min_valid` is a non-integer
-#' value from 0 to 1, `min_valid` is considered to indicate the proportion of
-#' required non-missing values per row. E.g., if `min_valid = 0.75`, a row must
-#' have at least `ncol(data) * min_valid` non-missing values for the row mean
-#' to be calculated. See 'Examples'.
+#' @details Rounding to a negative number of `digits` means rounding to a power
+#' of ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest
+#' hundred. For `min_valid`, if not `NULL`, `min_valid` must be a numeric value
+#' from `0` to `ncol(data)`. If a row in the data frame has at least `min_valid`
+#' non-missing values, the row mean or row sum is returned. If `min_valid` is a
+#' non-integer value from 0 to 1, `min_valid` is considered to indicate the
+#' proportion of required non-missing values per row. E.g., if
+#' `min_valid = 0.75`, a row must have at least `ncol(data) * min_valid`
+#' non-missing values for the row mean or row sum to be calculated. See
+#' 'Examples'.
 #'
 #' @examples
 #' dat <- data.frame(
@@ -49,6 +53,7 @@
 #'
 #' # needs at least 4 non-missing values per row
 #' row_means(dat, min_valid = 4) # 1 valid return value
+#' row_sums(dat, min_valid = 4) # 1 valid return value
 #'
 #' # needs at least 3 non-missing values per row
 #' row_means(dat, min_valid = 3) # 2 valid return values
@@ -61,6 +66,7 @@
 #'
 #' # needs at least 50% of non-missing values per row
 #' row_means(dat, min_valid = 0.5) # 3 valid return values
+#' row_sums(dat, min_valid = 0.5)
 #'
 #' # needs at least 75% of non-missing values per row
 #' row_means(dat, min_valid = 0.75) # 2 valid return values
@@ -84,34 +90,52 @@ row_means <- function(data,
     verbose = verbose
   )
 
-  if (is.null(select) || length(select) == 0) {
-    insight::format_error("No columns selected.")
-  }
+  # prepare data, sanity checks
+  data <- .prepare_row_data(data, select, min_valid, verbose)
 
-  data <- .coerce_to_dataframe(data[select])
+  # calculate row means
+  .row_sums_or_means(data, min_valid, digits, remove_na, fun = "mean")
+}
 
-  # n must be a numeric, non-missing value
-  if (!is.null(min_valid) && (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)) {
-    insight::format_error("`min_valid` must be a numeric value of length 1.")
-  }
 
-  # make sure we only have numeric values
-  numeric_columns <- vapply(data, is.numeric, TRUE)
-  if (!all(numeric_columns)) {
-    if (verbose) {
-      insight::format_alert("Only numeric columns are considered for calculation.")
-    }
-    data <- data[numeric_columns]
-  }
+#' @rdname row_means
+#' @export
+row_sums <- function(data,
+                     select = NULL,
+                     exclude = NULL,
+                     min_valid = NULL,
+                     digits = NULL,
+                     ignore_case = FALSE,
+                     regex = FALSE,
+                     remove_na = FALSE,
+                     verbose = TRUE) {
+  # evaluate arguments
+  select <- .select_nse(select,
+    data,
+    exclude,
+    ignore_case = ignore_case,
+    regex = regex,
+    verbose = verbose
+  )
+
+  # prepare data, sanity checks
+  data <- .prepare_row_data(data, select, min_valid, verbose)
+
+  # calculate row sums
+  .row_sums_or_means(data, min_valid, digits, remove_na, fun = "sum")
+}
 
-  # check if we have a data framme with at least two columns
-  if (ncol(data) < 2) {
-    insight::format_error("`data` must be a data frame with at least two numeric columns.")
-  }
 
-  # proceed here if min_valid is not NULL
+# helper ------------------------
+
+# calculate row means or sums
+.row_sums_or_means <- function(data, min_valid, digits, remove_na, fun) {
   if (is.null(min_valid)) {
-    out <- rowMeans(data, na.rm = remove_na)
+    # calculate row means or sums for complete data
+    out <- switch(fun,
+      mean = rowMeans(data, na.rm = remove_na),
+      rowSums(data, na.rm = remove_na)
+    )
   } else {
     # is 'min_valid' indicating a proportion?
     decimals <- min_valid %% 1
@@ -124,9 +148,12 @@ row_means <- function(data,
       insight::format_error("`min_valid` must be smaller or equal to number of columns in data frame.")
     }
 
-    # row means
+    # row means or sums
     to_na <- rowSums(is.na(data)) > ncol(data) - min_valid
-    out <- rowMeans(data, na.rm = TRUE)
+    out <- switch(fun,
+      mean = rowMeans(data, na.rm = TRUE),
+      rowSums(data, na.rm = TRUE)
+    )
     out[to_na] <- NA
   }
 
@@ -137,3 +164,34 @@ row_means <- function(data,
 
   out
 }
+
+
+# check that data is in shape for row means or row sums
+.prepare_row_data <- function(data, select, min_valid, verbose) {
+  if (is.null(select) || length(select) == 0) {
+    insight::format_error("No columns selected.")
+  }
+
+  data <- .coerce_to_dataframe(data[select])
+
+  # n must be a numeric, non-missing value
+  if (!is.null(min_valid) && (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)) {
+    insight::format_error("`min_valid` must be a numeric value of length 1.")
+  }
+
+  # make sure we only have numeric values
+  numeric_columns <- vapply(data, is.numeric, TRUE)
+  if (!all(numeric_columns)) {
+    if (verbose) {
+      insight::format_alert("Only numeric columns are considered for calculation.")
+    }
+    data <- data[numeric_columns]
+  }
+
+  # check if we have a data framme with at least two columns
+  if (ncol(data) < 2) {
+    insight::format_error("`data` must be a data frame with at least two numeric columns.")
+  }
+
+  data
+}
diff --git a/R/select_nse.R b/R/select_nse.R
index 8f9eba096..5120691a9 100644
--- a/R/select_nse.R
+++ b/R/select_nse.R
@@ -139,6 +139,7 @@
 # Possibilities:
 # - quoted variable name
 # - quoted variable name with ignore case
+# - quoted variable name with colon, to indicate range
 # - character that should be regex-ed on variable names
 # - special word "all" to return all vars
 
@@ -146,31 +147,63 @@
   # use colnames because names() doesn't work for matrices
   columns <- colnames(data)
   if (isTRUE(regex)) {
+    # string is a regular expression
     grep(x, columns)
   } else if (length(x) == 1L && x == "all") {
+    # string is "all" - select all columns
     seq_along(data)
+  } else if (any(grepl(":", x, fixed = TRUE))) {
+    # special pattern, as string (e.g.select = c("cyl:hp", "am")). However,
+    # this will first go into `.eval_call()` and thus only single elements
+    # are passed in `x` - we have never a character *vector* here
+    # check for valid names
+    colon_vars <- unlist(strsplit(x, ":", fixed = TRUE))
+    colon_match <- match(colon_vars, columns)
+    if (anyNA(colon_match)) {
+      .warn_not_found(colon_vars, columns, colon_match, verbose)
+      matches <- NA
+    } else {
+      start_pos <- match(colon_vars[1], columns)
+      end_pos <- match(colon_vars[2], columns)
+      if (!is.na(start_pos) && !is.na(end_pos)) {
+        matches <- start_pos:end_pos
+      } else {
+        matches <- NA
+      }
+    }
+    matches[!is.na(matches)]
   } else if (isTRUE(ignore_case)) {
+    # find columns, case insensitive
     matches <- match(toupper(x), toupper(columns))
     matches[!is.na(matches)]
   } else {
+    # find columns, case sensitive
     matches <- match(x, columns)
-    if (anyNA(matches) && verbose) {
-      insight::format_warning(
-        paste0(
-          "Following variable(s) were not found: ",
-          toString(x[is.na(matches)])
-        ),
-        .misspelled_string(
-          columns,
-          x[is.na(matches)],
-          default_message = "Possibly misspelled?"
-        )
-      )
+    if (anyNA(matches)) {
+      .warn_not_found(x, columns, matches, verbose)
     }
     matches[!is.na(matches)]
   }
 }
 
+# small helper, to avoid duplicated code
+.warn_not_found <- function(x, columns, matches, verbose = TRUE) {
+  if (verbose) {
+    insight::format_warning(
+      paste0(
+        "Following variable(s) were not found: ",
+        toString(x[is.na(matches)])
+      ),
+      .misspelled_string(
+        columns,
+        x[is.na(matches)],
+        default_message = "Possibly misspelled?"
+      )
+    )
+  }
+}
+
+
 # 3 types of symbols:
 # - unquoted variables
 # - objects that need to be evaluated, e.g data_find(iris, i) where
diff --git a/R/skewness_kurtosis.R b/R/skewness_kurtosis.R
index 6142c59ad..23ced0a04 100644
--- a/R/skewness_kurtosis.R
+++ b/R/skewness_kurtosis.R
@@ -110,15 +110,7 @@ skewness.numeric <- function(x,
                              type = "2",
                              iterations = NULL,
                              verbose = TRUE,
-                             na.rm = TRUE,
                              ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   if (remove_na) x <- x[!is.na(x)]
   n <- length(x)
   out <- (sum((x - mean(x))^3) / n) / (sum((x - mean(x))^2) / n)^1.5
@@ -177,15 +169,7 @@ skewness.matrix <- function(x,
                             remove_na = TRUE,
                             type = "2",
                             iterations = NULL,
-                            na.rm = TRUE,
                             ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   .skewness <- apply(
     x,
     2,
@@ -213,15 +197,7 @@ skewness.data.frame <- function(x,
                                 remove_na = TRUE,
                                 type = "2",
                                 iterations = NULL,
-                                na.rm = TRUE,
                                 ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   .skewness <- lapply(x,
     skewness,
     remove_na = remove_na,
@@ -241,15 +217,7 @@ skewness.default <- function(x,
                              remove_na = TRUE,
                              type = "2",
                              iterations = NULL,
-                             na.rm = TRUE,
                              ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   skewness(
     .factor_to_numeric(x),
     remove_na = remove_na,
@@ -277,15 +245,7 @@ kurtosis.numeric <- function(x,
                              type = "2",
                              iterations = NULL,
                              verbose = TRUE,
-                             na.rm = TRUE,
                              ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   if (remove_na) x <- x[!is.na(x)]
   n <- length(x)
   out <- n * sum((x - mean(x))^4) / (sum((x - mean(x))^2)^2)
@@ -342,15 +302,7 @@ kurtosis.matrix <- function(x,
                             remove_na = TRUE,
                             type = "2",
                             iterations = NULL,
-                            na.rm = TRUE,
                             ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   .kurtosis <- apply(
     x,
     2,
@@ -374,15 +326,7 @@ kurtosis.data.frame <- function(x,
                                 remove_na = TRUE,
                                 type = "2",
                                 iterations = NULL,
-                                na.rm = TRUE,
                                 ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   .kurtosis <- lapply(x,
     kurtosis,
     remove_na = remove_na,
@@ -400,15 +344,7 @@ kurtosis.default <- function(x,
                              remove_na = TRUE,
                              type = "2",
                              iterations = NULL,
-                             na.rm = TRUE,
                              ...) {
-  # TODO: remove deprecated argument later
-  if (!missing(na.rm)) {
-    # TODO: add deprecation warning in a later update
-    insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint
-    remove_na <- na.rm
-  }
-
   kurtosis(
     .factor_to_numeric(x),
     remove_na = remove_na,
diff --git a/R/standardize.models.R b/R/standardize.models.R
index 6f5a1dfa8..cf6062c78 100644
--- a/R/standardize.models.R
+++ b/R/standardize.models.R
@@ -78,6 +78,14 @@ standardize.default <- function(x,
     return(x)
   }
 
+  # check model formula. Some notations don't work when standardizing data
+  insight::formula_ok(
+    x,
+    action = "error",
+    prefix_msg = "Model cannot be standardized.",
+    verbose = verbose
+  )
+
   data_std <- NULL # needed to avoid note
   .standardize_models(x,
     robust = robust, two_sd = two_sd,
@@ -197,7 +205,7 @@ standardize.default <- function(x,
 
   ## ---- STANDARDIZE! ----
 
-  w <- insight::get_weights(x, na_rm = TRUE)
+  w <- insight::get_weights(x, remove_na = TRUE)
 
   data_std <- standardize(data[do_standardize],
     robust = robust,
diff --git a/R/text_format.R b/R/text_format.R
index afdf4f861..0fa75bcac 100644
--- a/R/text_format.R
+++ b/R/text_format.R
@@ -42,15 +42,6 @@ text_format <- function(text, sep = ", ", last = " and ", width = NULL, enclose
   text_wrap(text_concatenate(text, sep = sep, last = last, enclose = enclose), width = width)
 }
 
-## TODO Deprecate and remove alias later
-
-#' @rdname text_format
-#' @export
-format_text <- function(text, sep = ", ", last = " and ", width = NULL, enclose = NULL, ...) {
-  insight::format_warning("Function `format_text()` is deprecated and will be removed in a future release. Please use `text_format()` instead.") # nolint
-  text_format(text, sep = sep, last = last, width = width, enclose = enclose, ...)
-}
-
 #' @rdname text_format
 #' @export
 text_fullstop <- function(text) {
diff --git a/R/to_numeric.R b/R/to_numeric.R
index e38e12e80..3e75bccbd 100644
--- a/R/to_numeric.R
+++ b/R/to_numeric.R
@@ -17,11 +17,11 @@
 #' @inheritParams extract_column_names
 #' @inheritParams categorize
 #'
-#' @note By default, `to_numeric()` converts factors into "binary" dummies, i.e.
+#' @note When factors should be converted into multiple "binary" dummies, i.e.
 #' each factor level is converted into a separate column filled with a binary
-#' 0-1 value. If only one column is required, use `dummy_factors = FALSE`. If
-#' you want to preserve the original factor levels (in case these represent
-#' numeric values), use `preserve_levels = TRUE`.
+#' 0-1 value, set `dummy_factors = TRUE`. If you want to preserve the original
+#' factor levels (in case these represent numeric values), use
+#' `preserve_levels = TRUE`.
 #'
 #' @section Selection of variables - `select` argument:
 #' For most functions that have a `select` argument the complete input data
@@ -34,12 +34,12 @@
 #'
 #' @examples
 #' to_numeric(head(ToothGrowth))
-#' to_numeric(head(ToothGrowth), dummy_factors = FALSE)
+#' to_numeric(head(ToothGrowth), dummy_factors = TRUE)
 #'
 #' # factors
 #' x <- as.factor(mtcars$gear)
-#' to_numeric(x, dummy_factors = FALSE)
-#' to_numeric(x, dummy_factors = FALSE, preserve_levels = TRUE)
+#' to_numeric(x)
+#' to_numeric(x, preserve_levels = TRUE)
 #' # same as:
 #' coerce_to_numeric(x)
 #'
@@ -69,7 +69,7 @@ to_numeric.default <- function(x, verbose = TRUE, ...) {
 to_numeric.data.frame <- function(x,
                                   select = NULL,
                                   exclude = NULL,
-                                  dummy_factors = TRUE,
+                                  dummy_factors = FALSE,
                                   preserve_levels = FALSE,
                                   lowest = NULL,
                                   append = FALSE,
@@ -191,7 +191,7 @@ to_numeric.POSIXlt <- to_numeric.Date
 
 #' @export
 to_numeric.factor <- function(x,
-                              dummy_factors = TRUE,
+                              dummy_factors = FALSE,
                               preserve_levels = FALSE,
                               lowest = NULL,
                               verbose = TRUE,
diff --git a/README.Rmd b/README.Rmd
index ec0d01df7..39b8825ad 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -19,7 +19,7 @@ library(datawizard)
 
 [![DOI](https://joss.theoj.org/papers/10.21105/joss.04684/status.svg)](https://doi.org/10.21105/joss.04684)
 [![downloads](http://cranlogs.r-pkg.org/badges/datawizard)](https://cran.r-project.org/package=datawizard)
-[![total](https://cranlogs.r-pkg.org/badges/grand-total/datawizard)](https://cranlogs.r-pkg.org/) [![lifecycle](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html)
+[![total](https://cranlogs.r-pkg.org/badges/grand-total/datawizard)](https://cranlogs.r-pkg.org/) 
 
 <!-- ***:sparkles: Hockety pockety wockety wack, prepare this data forth and back*** -->
 
diff --git a/README.md b/README.md
index 712449df9..dd046ca12 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,6 @@
 [![DOI](https://joss.theoj.org/papers/10.21105/joss.04684/status.svg)](https://doi.org/10.21105/joss.04684)
 [![downloads](http://cranlogs.r-pkg.org/badges/datawizard)](https://cran.r-project.org/package=datawizard)
 [![total](https://cranlogs.r-pkg.org/badges/grand-total/datawizard)](https://cranlogs.r-pkg.org/)
-[![lifecycle](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html)
 
 <!-- ***:sparkles: Hockety pockety wockety wack, prepare this data forth and back*** -->
 <!-- ***Hockety pockety wockety wock, messy data is in shock*** -->
@@ -50,11 +49,11 @@ It covers two aspects of data preparation:
 badge](https://easystats.r-universe.dev/badges/datawizard)](https://easystats.r-universe.dev)
 [![R-CMD-check](https://github.com/easystats/datawizard/workflows/R-CMD-check/badge.svg?branch=main)](https://github.com/easystats/datawizard/actions)
 
-| Type | Source | Command |
-|----|----|----|
-| Release | CRAN | `install.packages("datawizard")` |
+| Type        | Source     | Command                                                                      |
+|-------------|------------|------------------------------------------------------------------------------|
+| Release     | CRAN       | `install.packages("datawizard")`                                             |
 | Development | r-universe | `install.packages("datawizard", repos = "https://easystats.r-universe.dev")` |
-| Development | GitHub | `remotes::install_github("easystats/datawizard")` |
+| Development | GitHub     | `remotes::install_github("easystats/datawizard")`                            |
 
 > **Tip**
 >
@@ -71,9 +70,10 @@ To cite the package, run the following command:
 citation("datawizard")
 To cite package 'datawizard' in publications use:
 
-  Patil et al., (2022). datawizard: An R Package for Easy Data
-  Preparation and Statistical Transformations. Journal of Open Source
-  Software, 7(78), 4684, https://doi.org/10.21105/joss.04684
+  Patil et al., (2022). datawizard: An R Package for Easy
+  Data Preparation and Statistical Transformations. Journal
+  of Open Source Software, 7(78), 4684,
+  https://doi.org/10.21105/joss.04684
 
 A BibTeX entry for LaTeX users is
 
@@ -136,9 +136,6 @@ columns, can be achieved using `extract_column_names()` or
 # find column names matching a pattern
 extract_column_names(iris, starts_with("Sepal"))
 #> [1] "Sepal.Length" "Sepal.Width"
-```
-
-``` r
 
 # return data columns matching a pattern
 data_select(iris, starts_with("Sepal")) |> head()
@@ -156,10 +153,8 @@ It is also possible to extract one or more variables:
 ``` r
 # single variable
 data_extract(mtcars, "gear")
-#>  [1] 4 4 4 3 3 3 3 4 4 4 4 3 3 3 3 3 3 4 4 4 3 3 3 3 3 4 5 5 5 5 5 4
-```
-
-``` r
+#>  [1] 4 4 4 3 3 3 3 4 4 4 4 3 3 3 3 3 3 4 4 4 3 3 3 3 3 4 5 5 5 5 5
+#> [32] 4
 
 # more variables
 head(data_extract(iris, ends_with("Width")))
@@ -220,17 +215,11 @@ x
 #> 1 1 a 5  1
 #> 2 2 b 6  2
 #> 3 3 c 7  3
-```
-
-``` r
 y
 #>   c d   e id
 #> 1 6 f 100  2
 #> 2 7 g 101  3
 #> 3 8 h 102  4
-```
-
-``` r
 
 data_merge(x, y, join = "full")
 #>    a    b c id    d   e
@@ -238,50 +227,32 @@ data_merge(x, y, join = "full")
 #> 1  2    b 6  2    f 100
 #> 2  3    c 7  3    g 101
 #> 4 NA <NA> 8  4    h 102
-```
-
-``` r
 
 data_merge(x, y, join = "left")
 #>   a b c id    d   e
 #> 3 1 a 5  1 <NA>  NA
 #> 1 2 b 6  2    f 100
 #> 2 3 c 7  3    g 101
-```
-
-``` r
 
 data_merge(x, y, join = "right")
 #>    a    b c id d   e
 #> 1  2    b 6  2 f 100
 #> 2  3    c 7  3 g 101
 #> 3 NA <NA> 8  4 h 102
-```
-
-``` r
 
 data_merge(x, y, join = "semi", by = "c")
 #>   a b c id
 #> 2 2 b 6  2
 #> 3 3 c 7  3
-```
-
-``` r
 
 data_merge(x, y, join = "anti", by = "c")
 #>   a b c id
 #> 1 1 a 5  1
-```
-
-``` r
 
 data_merge(x, y, join = "inner")
 #>   a b c id d   e
 #> 1 2 b 6  2 f 100
 #> 2 3 c 7  3 g 101
-```
-
-``` r
 
 data_merge(x, y, join = "bind")
 #>    a    b c id    d   e
@@ -322,17 +293,28 @@ data_to_wide(long_data,
   values_from = "value",
   id_cols = "Row_ID"
 )
-#>    Row_ID          X1          X2          X3         X4          X5
-#> 1       1 -0.08281164 -1.12490028 -0.70632036 -0.7027895  0.07633326
-#> 2       2  1.93468099 -0.87430362  0.96687656  0.2998642 -0.23035595
-#> 3       3 -2.05128979  0.04386162 -0.71016648  1.1494697  0.31746484
-#> 4       4  0.27773897 -0.58397514 -0.05917365 -0.3016415 -1.59268440
-#> 5       5 -1.52596060 -0.82329858 -0.23094342 -0.5473394 -0.18194062
-#> 6       6 -0.26916362  0.11059280  0.69200045 -0.3854041  1.75614174
-#> 7       7  1.23305388  0.36472778  1.35682290  0.2763720  0.11394932
-#> 8       8  0.63360774  0.05370100  1.78872284  0.1518608 -0.29216508
-#> 9       9  0.35271746  1.36867235  0.41071582 -0.4313808  1.75409316
-#> 10     10 -0.56048248 -0.38045724 -2.18785470 -1.8705001  1.80958455
+#>    Row_ID          X1          X2          X3         X4
+#> 1       1 -0.08281164 -1.12490028 -0.70632036 -0.7027895
+#> 2       2  1.93468099 -0.87430362  0.96687656  0.2998642
+#> 3       3 -2.05128979  0.04386162 -0.71016648  1.1494697
+#> 4       4  0.27773897 -0.58397514 -0.05917365 -0.3016415
+#> 5       5 -1.52596060 -0.82329858 -0.23094342 -0.5473394
+#> 6       6 -0.26916362  0.11059280  0.69200045 -0.3854041
+#> 7       7  1.23305388  0.36472778  1.35682290  0.2763720
+#> 8       8  0.63360774  0.05370100  1.78872284  0.1518608
+#> 9       9  0.35271746  1.36867235  0.41071582 -0.4313808
+#> 10     10 -0.56048248 -0.38045724 -2.18785470 -1.8705001
+#>             X5
+#> 1   0.07633326
+#> 2  -0.23035595
+#> 3   0.31746484
+#> 4  -1.59268440
+#> 5  -0.18194062
+#> 6   1.75614174
+#> 7   0.11394932
+#> 8  -0.29216508
+#> 9   1.75409316
+#> 10  1.80958455
 ```
 
 ### Empty rows and columns
@@ -352,22 +334,13 @@ tmp
 #> 3  3  3 NA  3
 #> 4 NA NA NA NA
 #> 5  5  5 NA  5
-```
-
-``` r
 
 # indices of empty columns or rows
 empty_columns(tmp)
 #> c 
 #> 3
-```
-
-``` r
 empty_rows(tmp)
 #> [1] 4
-```
-
-``` r
 
 # remove empty columns or rows
 remove_empty_columns(tmp)
@@ -377,18 +350,12 @@ remove_empty_columns(tmp)
 #> 3  3  3  3
 #> 4 NA NA NA
 #> 5  5  5  5
-```
-
-``` r
 remove_empty_rows(tmp)
 #>   a  b  c  d
 #> 1 1  1 NA  1
 #> 2 2 NA NA NA
 #> 3 3  3 NA  3
 #> 5 5  5 NA  5
-```
-
-``` r
 
 # remove empty columns and rows
 remove_empty(tmp)
@@ -409,9 +376,6 @@ table(x)
 #> x
 #>  1  2  3  4  5  6  7  8  9 10 
 #>  2  3  5  3  7  5  5  2 11  7
-```
-
-``` r
 
 # cut into 3 groups, based on distribution (quantiles)
 table(categorize(x, split = "quantile", n_groups = 3))
@@ -445,26 +409,23 @@ summary(swiss)
 #>  Mean   : 41.144   Mean   :19.94   
 #>  3rd Qu.: 93.125   3rd Qu.:21.70   
 #>  Max.   :100.000   Max.   :26.60
-```
-
-``` r
 
 # after
 summary(standardize(swiss))
-#>    Fertility         Agriculture       Examination         Education      
-#>  Min.   :-2.81327   Min.   :-2.1778   Min.   :-1.69084   Min.   :-1.0378  
-#>  1st Qu.:-0.43569   1st Qu.:-0.6499   1st Qu.:-0.56273   1st Qu.:-0.5178  
-#>  Median : 0.02061   Median : 0.1515   Median :-0.06134   Median :-0.3098  
-#>  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
-#>  3rd Qu.: 0.66504   3rd Qu.: 0.7481   3rd Qu.: 0.69074   3rd Qu.: 0.1062  
-#>  Max.   : 1.78978   Max.   : 1.7190   Max.   : 2.57094   Max.   : 4.3702  
-#>     Catholic       Infant.Mortality  
-#>  Min.   :-0.9350   Min.   :-3.13886  
-#>  1st Qu.:-0.8620   1st Qu.:-0.61543  
-#>  Median :-0.6235   Median : 0.01972  
-#>  Mean   : 0.0000   Mean   : 0.00000  
-#>  3rd Qu.: 1.2464   3rd Qu.: 0.60337  
-#>  Max.   : 1.4113   Max.   : 2.28566
+#>    Fertility         Agriculture       Examination      
+#>  Min.   :-2.81327   Min.   :-2.1778   Min.   :-1.69084  
+#>  1st Qu.:-0.43569   1st Qu.:-0.6499   1st Qu.:-0.56273  
+#>  Median : 0.02061   Median : 0.1515   Median :-0.06134  
+#>  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000  
+#>  3rd Qu.: 0.66504   3rd Qu.: 0.7481   3rd Qu.: 0.69074  
+#>  Max.   : 1.78978   Max.   : 1.7190   Max.   : 2.57094  
+#>    Education          Catholic       Infant.Mortality  
+#>  Min.   :-1.0378   Min.   :-0.9350   Min.   :-3.13886  
+#>  1st Qu.:-0.5178   1st Qu.:-0.8620   1st Qu.:-0.61543  
+#>  Median :-0.3098   Median :-0.6235   Median : 0.01972  
+#>  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000  
+#>  3rd Qu.: 0.1062   3rd Qu.: 1.2464   3rd Qu.: 0.60337  
+#>  Max.   : 4.3702   Max.   : 1.4113   Max.   : 2.28566
 ```
 
 ### Winsorize
@@ -486,9 +447,6 @@ anscombe
 #> 9  12 12 12  8 10.84 9.13  8.15  5.56
 #> 10  7  7  7  8  4.82 7.26  6.42  7.91
 #> 11  5  5  5  8  5.68 4.74  5.73  6.89
-```
-
-``` r
 
 # after
 winsorize(anscombe)
@@ -540,9 +498,6 @@ head(trees)
 #> 4  10.5     72   16.4
 #> 5  10.7     81   18.8
 #> 6  10.8     83   19.7
-```
-
-``` r
 
 # after
 head(ranktransform(trees))
@@ -575,9 +530,6 @@ x
 #> Mazda RX4     21.0   6  160 110
 #> Mazda RX4 Wag 21.0   6  160 110
 #> Datsun 710    22.8   4  108  93
-```
-
-``` r
 
 data_rotate(x)
 #>      Mazda RX4 Mazda RX4 Wag Datsun 710
diff --git a/cran-comments.md b/cran-comments.md
index 58de89d2a..095f22e9a 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -4,7 +4,8 @@
 
 ## revdepcheck results
 
-We checked 17 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
+We checked 18 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
 
  * We saw 0 new problems
  * We failed to check 0 packages
+
diff --git a/inst/WORDLIST b/inst/WORDLIST
index a3dd80b42..eda7dc71c 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -2,24 +2,31 @@ Analysing
 Asparouhov
 BMC
 Bafumi
+Brincks
+Bulotsky
 CMD
 Carle
 Catran
 Crosstables
+Dhaliwal
+Disaggregating
 DOI
 De
 Dom
 EFC
+Enders
 EUROFAMCARE
 Fairbrother
 GLMM
 Gelman
 Giesecke
 Giesselmann
+Guo
 Heisig
 Herrington
 Hoffmann
 Joanes
+Llabre
 Lumley
 MADs
 Mattan
@@ -79,6 +86,7 @@ midhinge
 modelbased
 modelling
 nd
+panelr
 partialization
 patilindrajeets
 platykurtic
diff --git a/man/adjust.Rd b/man/adjust.Rd
index 64e50d9d3..48b321b8f 100644
--- a/man/adjust.Rd
+++ b/man/adjust.Rd
@@ -43,8 +43,10 @@ out). If \code{NULL} (the default), all variables will be selected.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/assign_labels.Rd b/man/assign_labels.Rd
index cca14cc85..e6fd24252 100644
--- a/man/assign_labels.Rd
+++ b/man/assign_labels.Rd
@@ -38,8 +38,10 @@ labels are omitted.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/categorize.Rd b/man/categorize.Rd
index 28f823dd4..dbecbf5e6 100644
--- a/man/categorize.Rd
+++ b/man/categorize.Rd
@@ -14,6 +14,7 @@ categorize(x, ...)
   n_groups = NULL,
   range = NULL,
   lowest = 1,
+  breaks = "exclusive",
   labels = NULL,
   verbose = TRUE,
   ...
@@ -27,6 +28,7 @@ categorize(x, ...)
   n_groups = NULL,
   range = NULL,
   lowest = 1,
+  breaks = "exclusive",
   labels = NULL,
   append = FALSE,
   ignore_case = FALSE,
@@ -67,10 +69,19 @@ for numeric variables, the minimum of the original input is preserved. For
 factors, the default minimum is \code{1}. For \code{split = "equal_range"}, the
 default minimum is always \code{1}, unless specified otherwise in \code{lowest}.}
 
+\item{breaks}{Character, indicating whether breaks for categorizing data are
+\code{"inclusive"} (values indicate the \emph{upper} bound of the \emph{previous} group or
+interval) or \code{"exclusive"} (values indicate the \emph{lower} bound of the \emph{next}
+group or interval to begin). Use \code{labels = "range"} to make this behaviour
+easier to see.}
+
 \item{labels}{Character vector of value labels. If not \code{NULL}, \code{categorize()}
 will returns factors instead of numeric variables, with \code{labels} used
-for labelling the factor levels. Can also be \code{"mean"} or \code{"median"} for a
-factor with labels as the mean/median of each groups.}
+for labelling the factor levels. Can also be \code{"mean"}, \code{"median"},
+\code{"range"} or \code{"observed"} for a factor with labels as the mean/median,
+the requested range (even if not all values of that range are present in
+the data) or observed range (range of the actual recoded values) of each
+group. See 'Examples'.}
 
 \item{verbose}{Toggle warnings.}
 
@@ -78,8 +89,10 @@ factor with labels as the mean/median of each groups.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
@@ -145,7 +158,7 @@ It is basically a wrapper around base R's \code{cut()}, providing a simplified
 and more accessible way to define the interval breaks (cut-off values).
 }
 \section{Splits and breaks (cut-off values)}{
-Breaks are in general \emph{exclusive}, this means that these values indicate
+Breaks are by default \emph{exclusive}, this means that these values indicate
 the lower bound of the next group or interval to begin. Take a simple
 example, a numeric variable with values from 1 to 9. The median would be 5,
 thus the first interval ranges from 1-4 and is recoded into 1, while 5-9
@@ -154,6 +167,9 @@ using \code{split = "quantile"} and \code{n_groups = 3} would define breaks at 3
 and 6.33 (see \code{quantile(1:9, probs = c(1/3, 2/3))}), which means that values
 from 1 to 3 belong to the first interval and are recoded into 1 (because
 the next interval starts at 3.67), 4 to 6 into 2 and 7 to 9 into 3.
+
+The opposite behaviour can be achieved using \code{breaks = "inclusive"}, in which
+case
 }
 
 \section{Recoding into groups with equal size or range}{
@@ -217,6 +233,13 @@ categorize(x, "equal_length", n_groups = 3, labels = c("low", "mid", "high"))
 x <- sample(1:10, size = 30, replace = TRUE)
 categorize(x, "equal_length", n_groups = 3, labels = "mean")
 categorize(x, "equal_length", n_groups = 3, labels = "median")
+
+# cut numeric into groups with the requested range as a label name
+# each category has the same range, and labels indicate this range
+categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "range")
+# in this example, each category has the same range, but labels only refer
+# to the ranges of the actual values (present in the data) inside each group
+categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "observed")
 }
 \seealso{
 \itemize{
diff --git a/man/center.Rd b/man/center.Rd
index f143f64b2..4774020ab 100644
--- a/man/center.Rd
+++ b/man/center.Rd
@@ -72,8 +72,10 @@ against the names of the selected variables.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/coef_var.Rd b/man/coef_var.Rd
index 0f0965076..2ff973838 100644
--- a/man/coef_var.Rd
+++ b/man/coef_var.Rd
@@ -19,7 +19,6 @@ distribution_coef_var(x, ...)
   trim = 0,
   remove_na = FALSE,
   n = NULL,
-  na.rm = FALSE,
   ...
 )
 }
@@ -52,8 +51,6 @@ or not (\code{FALSE}, default)?}
 \item{n}{If \code{method = "unbiased"} and both \code{mu} and \code{sigma} are provided (not
 computed from \code{x}), what sample size to use to adjust the computed CV
 for small-sample bias?}
-
-\item{na.rm}{Deprecated. Please use \code{remove_na} instead.}
 }
 \value{
 The computed coefficient of variation for \code{x}.
@@ -79,14 +76,10 @@ This means that CV is \strong{NOT} invariant to shifting, but it is to scaling:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{sandwiches <- c(0, 4, 15, 0, 0, 5, 2, 7)
 coef_var(sandwiches)
 #> [1] 1.239094
-}\if{html}{\out{</div>}}
 
-\if{html}{\out{<div class="sourceCode r">}}\preformatted{
 coef_var(sandwiches / 2) # same
 #> [1] 1.239094
-}\if{html}{\out{</div>}}
 
-\if{html}{\out{<div class="sourceCode r">}}\preformatted{
 coef_var(sandwiches + 4) # different! 0 is no longer meaningful!
 #> [1] 0.6290784
 }\if{html}{\out{</div>}}
diff --git a/man/convert_na_to.Rd b/man/convert_na_to.Rd
index 91121ff94..702e0eb2e 100644
--- a/man/convert_na_to.Rd
+++ b/man/convert_na_to.Rd
@@ -41,8 +41,10 @@ replace \code{NA}.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/convert_to_na.Rd b/man/convert_to_na.Rd
index 2529294b7..fe308d61e 100644
--- a/man/convert_to_na.Rd
+++ b/man/convert_to_na.Rd
@@ -44,8 +44,10 @@ by \code{NA}, should unused levels be dropped?}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_codebook.Rd b/man/data_codebook.Rd
index 4c0f935e7..d5a542be4 100644
--- a/man/data_codebook.Rd
+++ b/man/data_codebook.Rd
@@ -34,8 +34,10 @@ data_codebook(
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
@@ -125,7 +127,8 @@ labels, values or value range, frequencies, amount of missing values).
 \note{
 There are methods to \code{print()} the data frame in a nicer output, as
 well methods for printing in markdown or HTML format (\code{print_md()} and
-\code{print_html()}).
+\code{print_html()}). The \code{print()} method for text outputs passes arguments in
+\code{...} to \code{\link[insight:export_table]{insight::export_table()}}.
 }
 \examples{
 data(iris)
diff --git a/man/data_duplicated.Rd b/man/data_duplicated.Rd
index 73c3e8de1..88624c8c8 100644
--- a/man/data_duplicated.Rd
+++ b/man/data_duplicated.Rd
@@ -20,8 +20,10 @@ data_duplicated(
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_extract.Rd b/man/data_extract.Rd
index a0cd4e402..0b544e710 100644
--- a/man/data_extract.Rd
+++ b/man/data_extract.Rd
@@ -27,8 +27,10 @@ and data frame extensions (e.g., tibbles).}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_group.Rd b/man/data_group.Rd
index 56f5f314e..9cb55de5d 100644
--- a/man/data_group.Rd
+++ b/man/data_group.Rd
@@ -24,8 +24,10 @@ data_ungroup(data, verbose = TRUE, ...)
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_match.Rd b/man/data_match.Rd
index a57c34768..a209170ab 100644
--- a/man/data_match.Rd
+++ b/man/data_match.Rd
@@ -5,7 +5,15 @@
 \alias{data_filter}
 \title{Return filtered or sliced data frame, or row indices}
 \usage{
-data_match(x, to, match = "and", return_indices = FALSE, drop_na = TRUE, ...)
+data_match(
+  x,
+  to,
+  match = "and",
+  return_indices = FALSE,
+  remove_na = TRUE,
+  drop_na,
+  ...
+)
 
 data_filter(x, ...)
 }
@@ -24,12 +32,14 @@ or \code{"not"} (or \code{"!"}).}
 can be used to filter the original data frame. If \code{FALSE} (default),
 returns directly the filtered data frame instead of the row indices.}
 
-\item{drop_na}{Logical, if \code{TRUE}, missing values (\code{NA}s) are removed before
+\item{remove_na}{Logical, if \code{TRUE}, missing values (\code{NA}s) are removed before
 filtering the data. This is the default behaviour, however, sometimes when
 row indices are requested (i.e. \code{return_indices=TRUE}), it might be useful
 to preserve \code{NA} values, so returned row indices match the row indices of
 the original data frame.}
 
+\item{drop_na}{Deprecated, please use \code{remove_na} instead.}
+
 \item{...}{A sequence of logical expressions indicating which rows to keep,
 or a numeric vector indicating the row indices of rows to keep. Can also be
 a string representation of a logical expression (e.g. \code{"x > 4"}), a
diff --git a/man/data_modify.Rd b/man/data_modify.Rd
index 042962e03..28533ecea 100644
--- a/man/data_modify.Rd
+++ b/man/data_modify.Rd
@@ -30,6 +30,9 @@ type of expression cannot be mixed with other expressions, i.e. if a
 character vector is provided, you may not add further elements to \code{...}.
 \item Using \code{NULL} as right-hand side removes a variable from the data frame.
 Example: \code{Petal.Width = NULL}.
+\item For data frames (including grouped ones), the function \code{n()} can be used to count the
+number of observations and thereby, for instance, create index values by
+using \code{id = 1:n()} or \code{id = 3:(n()+2)} and similar.
 }
 
 Note that newly created variables can be used in subsequent expressions,
@@ -109,7 +112,8 @@ new_efc <- data_modify(
   grouped_efc,
   c12hour_c = center(c12hour),
   c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),
-  c12hour_z2 = standardize(c12hour)
+  c12hour_z2 = standardize(c12hour),
+  id = 1:n()
 )
 head(new_efc)
 
diff --git a/man/data_partition.Rd b/man/data_partition.Rd
index 68ac05a19..1150b4f28 100644
--- a/man/data_partition.Rd
+++ b/man/data_partition.Rd
@@ -11,7 +11,6 @@ data_partition(
   seed = NULL,
   row_id = ".row_id",
   verbose = TRUE,
-  group = NULL,
   ...
 )
 }
@@ -33,8 +32,6 @@ contains the row-id's.}
 
 \item{verbose}{Toggle messages and warnings.}
 
-\item{group}{Deprecated. Use \code{by} instead.}
-
 \item{...}{Other arguments passed to or from other functions.}
 }
 \value{
diff --git a/man/data_peek.Rd b/man/data_peek.Rd
index 4f3f88e8a..9524c70ec 100644
--- a/man/data_peek.Rd
+++ b/man/data_peek.Rd
@@ -27,8 +27,10 @@ data_peek(x, ...)
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_read.Rd b/man/data_read.Rd
index 1ae3cea8a..d7d26255b 100644
--- a/man/data_read.Rd
+++ b/man/data_read.Rd
@@ -33,15 +33,16 @@ for SAS data files.}
 \item{encoding}{The character encoding used for the file. Usually not needed.}
 
 \item{convert_factors}{If \code{TRUE} (default), numeric variables, where all
-values have a value label, are assumed to be categorical and converted
-into factors. If \code{FALSE}, no variable types are guessed and no conversion
-of numeric variables into factors will be performed. See also section
-'Differences to other packages'. For \code{data_write()}, this argument only
-applies to the text (e.g. \code{.txt} or \code{.csv}) or spreadsheet file formats (like
-\code{.xlsx}). Converting to factors might be useful for these formats because
-labelled numeric variables are then converted into factors and exported as
-character columns - else, value labels would be lost and only numeric values
-are written to the file.}
+values have a value label, are assumed to be categorical and converted into
+factors. If \code{FALSE}, no variable types are guessed and no conversion of
+numeric variables into factors will be performed. For \code{data_read()}, this
+argument only applies to file types with \emph{labelled data}, e.g. files from
+SPSS, SAS or Stata. See also section 'Differences to other packages'. For
+\code{data_write()}, this argument only applies to the text (e.g. \code{.txt} or
+\code{.csv}) or spreadsheet file formats (like \code{.xlsx}). Converting to factors
+might be useful for these formats because labelled numeric variables are then
+converted into factors and exported as character columns - else, value labels
+would be lost and only numeric values are written to the file.}
 
 \item{verbose}{Toggle warnings and messages.}
 
@@ -118,12 +119,13 @@ versions, use \code{compress = "none"}, for example
 
 \code{data_read()} is most comparable to \code{rio::import()}. For data files from
 SPSS, SAS or Stata, which support labelled data, variables are converted into
-their most appropriate type. The major difference to \code{rio::import()} is that
-\code{data_read()} automatically converts fully labelled numeric variables into
-factors, where imported value labels will be set as factor levels. If a
-numeric variable has \emph{no} value labels or less value labels than values, it
-is not converted to factor. In this case, value labels are preserved as
-\code{"labels"} attribute. Character vectors are preserved. Use
+their most appropriate type. The major difference to \code{rio::import()} is for
+data files from SPSS, SAS, or Stata, i.e. file types that support
+\emph{labelled data}. \code{data_read()} automatically converts fully labelled numeric
+variables into factors, where imported value labels will be set as factor
+levels. If a numeric variable has \emph{no} value labels or less value labels than
+values, it is not converted to factor. In this case, value labels are
+preserved as \code{"labels"} attribute. Character vectors are preserved. Use
 \code{convert_factors = FALSE} to remove the automatic conversion of numeric
 variables to factors.
 }
diff --git a/man/data_relocate.Rd b/man/data_relocate.Rd
index 30e4dbbfe..9949b5d27 100644
--- a/man/data_relocate.Rd
+++ b/man/data_relocate.Rd
@@ -44,8 +44,10 @@ data_remove(
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_rename.Rd b/man/data_rename.Rd
index f1f4de938..2ff779c21 100644
--- a/man/data_rename.Rd
+++ b/man/data_rename.Rd
@@ -46,14 +46,20 @@ data_rename_rows(data, rows = NULL)
 \item{pattern}{Character vector. For \code{data_rename()}, indicates columns that
 should be selected for renaming. Can be \code{NULL} (in which case all columns
 are selected). For \code{data_addprefix()} or \code{data_addsuffix()}, a character
-string, which will be added as prefix or suffix to the column names.}
+string, which will be added as prefix or suffix to the column names. For
+\code{data_rename()}, \code{pattern} can also be a named vector. In this case, names
+are used as values for the \code{replacement} argument (i.e. \code{pattern} can be a
+character vector using \verb{<new name> = "<old name>"} and argument \code{replacement}
+will be ignored then).}
 
 \item{select}{Variables that will be included when performing the required
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
@@ -104,7 +110,7 @@ functions (see 'Details'), this argument may be used as workaround.}
 \item{replacement}{Character vector. Indicates the new name of the columns
 selected in \code{pattern}. Can be \code{NULL} (in which case column are numbered
 in sequential order). If not \code{NULL}, \code{pattern} and \code{replacement} must be
-of the same length.}
+of the same length. If \code{pattern} is a named vector, \code{replacement} is ignored.}
 
 \item{safe}{Do not throw error if for instance the variable to be
 renamed/removed doesn't exist.}
@@ -134,12 +140,14 @@ head(data_rename(iris, "Sepal.Length", "length"))
 head(data_rename(iris, "FakeCol", "length")) # This doesn't
 head(data_rename(iris, c("Sepal.Length", "Sepal.Width"), c("length", "width")))
 
+# use named vector to rename
+head(data_rename(iris, c(length = "Sepal.Length", width = "Sepal.Width")))
+
 # Reset names
 head(data_rename(iris, NULL))
 
 # Change all
 head(data_rename(iris, replacement = paste0("Var", 1:5)))
-
 }
 \seealso{
 \itemize{
diff --git a/man/data_replicate.Rd b/man/data_replicate.Rd
index 35448155d..5a427d570 100644
--- a/man/data_replicate.Rd
+++ b/man/data_replicate.Rd
@@ -27,8 +27,10 @@ column. Note that the variable indicated by \code{expand} must be an integer vec
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_separate.Rd b/man/data_separate.Rd
index 37528d46e..7c951f81c 100644
--- a/man/data_separate.Rd
+++ b/man/data_separate.Rd
@@ -30,8 +30,10 @@ data_separate(
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_summary.Rd b/man/data_summary.Rd
index ccbf4c524..24cfa1a9f 100644
--- a/man/data_summary.Rd
+++ b/man/data_summary.Rd
@@ -7,7 +7,7 @@
 \usage{
 data_summary(x, ...)
 
-\method{data_summary}{data.frame}(x, ..., by = NULL, include_na = TRUE)
+\method{data_summary}{data.frame}(x, ..., by = NULL, remove_na = FALSE)
 }
 \arguments{
 \item{x}{A (grouped) data frame.}
@@ -22,9 +22,9 @@ summary function \code{n()} can be used to count the number of observations.}
 If supplied, the data will be split by this variable and summary statistics
 will be computed for each group.}
 
-\item{include_na}{Logical. If \code{TRUE}, missing values are included as a level
-in the grouping variable. If \code{FALSE}, missing values are omitted from the
-grouping variable.}
+\item{remove_na}{Logical. If \code{TRUE}, missing values are omitted from the
+grouping variable. If \code{FALSE} (default), missing values are included as a
+level in the grouping variable.}
 }
 \value{
 A data frame with the requested summary statistics.
diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd
index b744c1f1b..b28a26ede 100644
--- a/man/data_tabulate.Rd
+++ b/man/data_tabulate.Rd
@@ -4,6 +4,7 @@
 \alias{data_tabulate}
 \alias{data_tabulate.default}
 \alias{data_tabulate.data.frame}
+\alias{as.data.frame.datawizard_tables}
 \title{Create frequency and crosstables of variables}
 \usage{
 data_tabulate(x, ...)
@@ -13,7 +14,7 @@ data_tabulate(x, ...)
   by = NULL,
   drop_levels = FALSE,
   weights = NULL,
-  include_na = TRUE,
+  remove_na = FALSE,
   proportions = NULL,
   name = NULL,
   verbose = TRUE,
@@ -29,12 +30,21 @@ data_tabulate(x, ...)
   by = NULL,
   drop_levels = FALSE,
   weights = NULL,
-  include_na = TRUE,
+  remove_na = FALSE,
   proportions = NULL,
   collapse = FALSE,
   verbose = TRUE,
   ...
 )
+
+\method{as.data.frame}{datawizard_tables}(
+  x,
+  row.names = NULL,
+  optional = FALSE,
+  ...,
+  stringsAsFactors = FALSE,
+  add_total = FALSE
+)
 }
 \arguments{
 \item{x}{A (grouped) data frame, a vector or factor.}
@@ -52,7 +62,7 @@ factor levels are dropped from the frequency table.}
 \item{weights}{Optional numeric vector of weights. Must be of the same length
 as \code{x}. If \code{weights} is supplied, weighted frequencies are calculated.}
 
-\item{include_na}{Logical, if \code{TRUE}, missing values are included in the
+\item{remove_na}{Logical, if \code{FALSE}, missing values are included in the
 frequency or crosstable, else missing values are omitted.}
 
 \item{proportions}{Optional character string, indicating the type of
@@ -69,8 +79,10 @@ for printing.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
@@ -116,6 +128,24 @@ functions (see 'Details'), this argument may be used as workaround.}
 
 \item{collapse}{Logical, if \code{TRUE} collapses multiple tables into one larger
 table for printing. This affects only printing, not the returned object.}
+
+\item{row.names}{\code{NULL} or a character vector giving the row
+    names for the data frame.  Missing values are not allowed.}
+
+\item{optional}{logical. If \code{TRUE}, setting row names and
+    converting column names (to syntactic names: see
+    \code{\link[base]{make.names}}) is optional.  Note that all of \R's
+    \pkg{base} package \code{as.data.frame()} methods use
+    \code{optional} only for column names treatment, basically with the
+    meaning of \code{\link[base]{data.frame}(*, check.names = !optional)}.
+    See also the \code{make.names} argument of the \code{matrix} method.}
+
+\item{stringsAsFactors}{logical: should the character vector be converted
+    to a factor?}
+
+\item{add_total}{For crosstables (i.e. when \code{by} is not \code{NULL}), a row and
+column with the total N values are added to the data frame. \code{add_total} has
+no effect in \code{as.data.frame()} for simple frequency tables.}
 }
 \value{
 A data frame, or a list of data frames, with one frequency table
@@ -127,10 +157,18 @@ including the number of levels/values as well as the distribution of raw,
 valid and cumulative percentages. For crosstables, row, column  and cell
 percentages can be calculated.
 }
+\details{
+There is an \code{as.data.frame()} method, to return the frequency tables as a
+data frame. The structure of the returned object is a nested data frame,
+where the first column contains name of the variable for which frequencies
+were calculated, and the second column is a list column that contains the
+frequency tables as data frame. See 'Examples'.
+}
 \note{
 There are \code{print_html()} and \code{print_md()} methods available for printing
 frequency or crosstables in HTML and markdown format, e.g.
-\code{print_html(data_tabulate(x))}.
+\code{print_html(data_tabulate(x))}. The \code{print()} method for text outputs passes
+arguments in \code{...} to \code{\link[insight:export_table]{insight::export_table()}}.
 }
 \section{Crosstables}{
 
@@ -138,7 +176,7 @@ If \code{by} is supplied, a crosstable is created. The crosstable includes \verb
 (missing) values by default. The first column indicates values of \code{x}, the
 first row indicates values of \code{by} (including missing values). The last row
 and column contain the total frequencies for each row and column, respectively.
-Setting \code{include_na = FALSE} will omit missing values from the crosstable.
+Setting \code{remove_na = FALSE} will omit missing values from the crosstable.
 Setting \code{proportions} to \code{"row"} or \code{"column"} will add row or column
 percentages. Setting \code{proportions} to \code{"full"} will add relative frequencies
 for the full table.
@@ -154,7 +192,7 @@ data(efc)
 data_tabulate(efc$c172code)
 
 # drop missing values
-data_tabulate(efc$c172code, include_na = FALSE)
+data_tabulate(efc$c172code, remove_na = TRUE)
 
 # data frame
 data_tabulate(efc, c("e42dep", "c172code"))
@@ -201,11 +239,17 @@ data_tabulate(
   efc$c172code,
   by = efc$e16sex,
   proportions = "column",
-  include_na = FALSE
+  remove_na = TRUE
 )
 
 # round percentages
 out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")
 print(out, digits = 0)
+
+# coerce to data frames
+result <- data_tabulate(efc, "c172code", by = "e16sex")
+as.data.frame(result)
+as.data.frame(result)$table
+as.data.frame(result, add_total = TRUE)$table
 \dontshow{\}) # examplesIf}
 }
diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd
index 741725d25..73b54219b 100644
--- a/man/data_to_long.Rd
+++ b/man/data_to_long.Rd
@@ -45,8 +45,10 @@ rows and fewer columns after the operation.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_unique.Rd b/man/data_unique.Rd
index 8a45bfc21..a0a70b92a 100644
--- a/man/data_unique.Rd
+++ b/man/data_unique.Rd
@@ -21,8 +21,10 @@ data_unique(
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/data_unite.Rd b/man/data_unite.Rd
index ba7710a8a..369fd33d8 100644
--- a/man/data_unite.Rd
+++ b/man/data_unite.Rd
@@ -27,8 +27,10 @@ data_unite(
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/datawizard-package.Rd b/man/datawizard-package.Rd
index db38bc334..d389df6ac 100644
--- a/man/datawizard-package.Rd
+++ b/man/datawizard-package.Rd
@@ -33,16 +33,16 @@ Useful links:
 
 Authors:
 \itemize{
-  \item Indrajeet Patil \email{patilindrajeet.science@gmail.com} (\href{https://orcid.org/0000-0003-1995-6531}{ORCID}) (@patilindrajeets)
-  \item Dominique Makowski \email{dom.makowski@gmail.com} (\href{https://orcid.org/0000-0001-5375-9967}{ORCID}) (@Dom_Makowski)
-  \item Daniel Lüdecke \email{d.luedecke@uke.de} (\href{https://orcid.org/0000-0002-8895-3206}{ORCID}) (@strengejacke)
+  \item Indrajeet Patil \email{patilindrajeet.science@gmail.com} (\href{https://orcid.org/0000-0003-1995-6531}{ORCID})
+  \item Dominique Makowski \email{dom.makowski@gmail.com} (\href{https://orcid.org/0000-0001-5375-9967}{ORCID})
+  \item Daniel Lüdecke \email{d.luedecke@uke.de} (\href{https://orcid.org/0000-0002-8895-3206}{ORCID})
   \item Mattan S. Ben-Shachar \email{matanshm@post.bgu.ac.il} (\href{https://orcid.org/0000-0002-4287-4801}{ORCID})
-  \item Brenton M. Wiernik \email{brenton@wiernik.org} (\href{https://orcid.org/0000-0001-9560-6336}{ORCID}) (@bmwiernik)
+  \item Brenton M. Wiernik \email{brenton@wiernik.org} (\href{https://orcid.org/0000-0001-9560-6336}{ORCID})
 }
 
 Other contributors:
 \itemize{
-  \item Rémi Thériault \email{remi.theriault@mail.mcgill.ca} (\href{https://orcid.org/0000-0003-4315-6788}{ORCID}) (@rempsyc) [contributor]
+  \item Rémi Thériault \email{remi.theriault@mail.mcgill.ca} (\href{https://orcid.org/0000-0003-4315-6788}{ORCID}) [contributor]
   \item Thomas J. Faulkenberry \email{faulkenberry@tarleton.edu} [reviewer]
   \item Robert Garrett \email{rcg4@illinois.edu} [reviewer]
 }
diff --git a/man/demean.Rd b/man/demean.Rd
index d03a1010b..fb4db3a29 100644
--- a/man/demean.Rd
+++ b/man/demean.Rd
@@ -10,35 +10,35 @@ demean(
   x,
   select,
   by,
+  nested = FALSE,
   suffix_demean = "_within",
   suffix_groupmean = "_between",
   add_attributes = TRUE,
-  verbose = TRUE,
-  group = NULL
+  verbose = TRUE
 )
 
 degroup(
   x,
   select,
   by,
+  nested = FALSE,
   center = "mean",
   suffix_demean = "_within",
   suffix_groupmean = "_between",
   add_attributes = TRUE,
-  verbose = TRUE,
-  group = NULL
+  verbose = TRUE
 )
 
 detrend(
   x,
   select,
   by,
+  nested = FALSE,
   center = "mean",
   suffix_demean = "_within",
   suffix_groupmean = "_between",
   add_attributes = TRUE,
-  verbose = TRUE,
-  group = NULL
+  verbose = TRUE
 )
 }
 \arguments{
@@ -48,7 +48,28 @@ detrend(
 that should be group- and de-meaned.}
 
 \item{by}{Character vector (or formula) with the name of the variable that
-indicates the group- or cluster-ID.}
+indicates the group- or cluster-ID. For cross-classified or nested designs,
+\code{by} can also identify two or more variables as group- or cluster-IDs. If
+the data is nested and should be treated as such, set \code{nested = TRUE}. Else,
+if \code{by} defines two or more variables and \code{nested = FALSE}, a cross-classified
+design is assumed. Note that \code{demean()} and \code{degroup()} can't handle a mix
+of nested and cross-classified designs in one model.
+
+For nested designs, \code{by} can be:
+\itemize{
+\item a character vector with the name of the variable that indicates the
+levels, ordered from \emph{highest} level to \emph{lowest} (e.g.
+\code{by = c("L4", "L3", "L2")}.
+\item a character vector with variable names in the format \code{by = "L4/L3/L2"},
+where the levels are separated by \code{/}.
+}
+
+See also section \emph{De-meaning for cross-classified designs} and
+\emph{De-meaning for nested designs} below.}
+
+\item{nested}{Logical, if \code{TRUE}, the data is treated as nested. If \code{FALSE},
+the data is treated as cross-classified. Only applies if \code{by} contains more
+than one variable.}
 
 \item{suffix_demean, suffix_groupmean}{String value, will be appended to the
 names of the group-meaned and de-meaned variables of \code{x}. By default,
@@ -62,8 +83,6 @@ within- and between-effects are printed in separated blocks.}
 
 \item{verbose}{Toggle warnings and messages.}
 
-\item{group}{Deprecated. Use \code{by} instead.}
-
 \item{center}{Method for centering. \code{demean()} always performs
 mean-centering, while \code{degroup()} can use \code{center = "median"} or
 \code{center = "mode"} for median- or mode-centering, and also \code{"min"}
@@ -72,7 +91,10 @@ or \code{"max"}.}
 \value{
 A data frame with the group-/de-meaned variables, which get the suffix
 \code{"_between"} (for the group-meaned variable) and \code{"_within"} (for the
-de-meaned variable) by default.
+de-meaned variable) by default. For cross-classified or nested designs,
+the name pattern of the group-meaned variables is the name of the centered
+variable followed by the name of the variable that indicates the related
+grouping level, e.g. \code{predictor_L3_between} and \code{predictor_L2_between}.
 }
 \description{
 \code{demean()} computes group- and de-meaned versions of a variable that can be
@@ -81,46 +103,50 @@ used in regression analysis to model the between- and within-subject effect.
 \code{demean()} always uses mean-centering, \code{degroup()} can also use the mode or
 median for centering.
 }
-\details{
-\subsection{Heterogeneity Bias}{
+\section{Heterogeneity Bias}{
+
+
 Mixed models include different levels of sources of variability, i.e.
 error terms at each level. When macro-indicators (or level-2 predictors,
 or higher-level units, or more general: \emph{group-level predictors that
 \strong{vary} within and across groups}) are included as fixed effects (i.e.
 treated as covariate at level-1), the variance that is left unaccounted for
 this covariate will be absorbed into the error terms of level-1 and level-2
-(\cite{Bafumi and Gelman 2006; Gelman and Hill 2007, Chapter 12.6.}):
-\dQuote{Such covariates contain two parts: one that is specific to the
-higher-level entity that does not vary between occasions, and one that
-represents the difference between occasions, within higher-level entities}
-(\cite{Bell et al. 2015}). Hence, the error terms will be correlated with
-the covariate, which violates one of the assumptions of mixed models
-(iid, independent and identically distributed error terms). This bias is
-also called the \emph{heterogeneity bias} (\cite{Bell et al. 2015}). To
-resolve this problem, level-2 predictors used as (level-1) covariates should
-be separated into their "within" and "between" effects by "de-meaning" and
-"group-meaning": After demeaning time-varying predictors, \dQuote{at the
-higher level, the mean term is no longer constrained by Level 1 effects,
-so it is free to account for all the higher-level variance associated
-with that variable} (\cite{Bell et al. 2015}).
+(\emph{Bafumi and Gelman 2006; Gelman and Hill 2007, Chapter 12.6.}):
+"Such covariates contain two parts: one that is specific to the higher-level
+entity that does not vary between occasions, and one that represents the
+difference between occasions, within higher-level entities" (\emph{Bell et al. 2015}).
+Hence, the error terms will be correlated with the covariate, which violates
+one of the assumptions of mixed models (iid, independent and identically
+distributed error terms). This bias is also called the \emph{heterogeneity bias}
+(\emph{Bell et al. 2015}). To resolve this problem, level-2 predictors used as
+(level-1) covariates should be separated into their "within" and "between"
+effects by "de-meaning" and "group-meaning": After demeaning time-varying
+predictors, "at the higher level, the mean term is no longer constrained by
+Level 1 effects, so it is free to account for all the higher-level variance
+associated with that variable" (\emph{Bell et al. 2015}).
 }
 
-\subsection{Panel data and correlating fixed and group effects}{
-\code{demean()} is intended to create group- and de-meaned variables
-for panel regression models (fixed effects models), or for complex
-random-effect-within-between models (see \cite{Bell et al. 2015, 2018}),
-where group-effects (random effects) and fixed effects correlate (see
-\cite{Bafumi and Gelman 2006}). This can happen, for instance, when
-analyzing panel data, which can lead to \emph{Heterogeneity Bias}. To
-control for correlating predictors and group effects, it is recommended
-to include the group-meaned and de-meaned version of \emph{time-varying covariates}
-(and group-meaned version of \emph{time-invariant covariates} that are on
-a higher level, e.g. level-2 predictors) in the model. By this, one can
-fit complex multilevel models for panel data, including time-varying
-predictors, time-invariant predictors and random effects.
+\section{Panel data and correlating fixed and group effects}{
+
+
+\code{demean()} is intended to create group- and de-meaned variables for panel
+regression models (fixed effects models), or for complex
+random-effect-within-between models (see \emph{Bell et al. 2015, 2018}), where
+group-effects (random effects) and fixed effects correlate (see
+\emph{Bafumi and Gelman 2006}). This can happen, for instance, when analyzing
+panel data, which can lead to \emph{Heterogeneity Bias}. To control for correlating
+predictors and group effects, it is recommended to include the group-meaned
+and de-meaned version of \emph{time-varying covariates} (and group-meaned version
+of \emph{time-invariant covariates} that are on a higher level, e.g. level-2
+predictors) in the model. By this, one can fit complex multilevel models for
+panel data, including time-varying predictors, time-invariant predictors and
+random effects.
 }
 
-\subsection{Why mixed models are preferred over fixed effects models}{
+\section{Why mixed models are preferred over fixed effects models}{
+
+
 A mixed models approach can model the causes of endogeneity explicitly
 by including the (separated) within- and between-effects of time-varying
 fixed effects and including time-constant fixed effects. Furthermore,
@@ -128,24 +154,28 @@ mixed models also include random effects, thus a mixed models approach
 is superior to classic fixed-effects models, which lack information of
 variation in the group-effects or between-subject effects. Furthermore,
 fixed effects regression cannot include random slopes, which means that
-fixed effects regressions are neglecting \dQuote{cross-cluster differences
-in the effects of lower-level controls (which) reduces the precision of
-estimated context effects, resulting in unnecessarily wide confidence
-intervals and low statistical power} (\cite{Heisig et al. 2017}).
+fixed effects regressions are neglecting "cross-cluster differences in the
+effects of lower-level controls (which) reduces the precision of estimated
+context effects, resulting in unnecessarily wide confidence intervals and
+low statistical power" (\emph{Heisig et al. 2017}).
 }
 
-\subsection{Terminology}{
+\section{Terminology}{
+
+
 The group-meaned variable is simply the mean of an independent variable
-within each group (or id-level or cluster) represented by \code{by}.
-It represents the cluster-mean of an independent variable. The regression
-coefficient of a group-meaned variable is the \emph{between-subject-effect}.
-The de-meaned variable is then the centered version of the group-meaned
-variable. De-meaning is sometimes also called person-mean centering or
-centering within clusters. The regression coefficient of a de-meaned
-variable represents the \emph{within-subject-effect}.
+within each group (or id-level or cluster) represented by \code{by}. It represents
+the cluster-mean of an independent variable. The regression coefficient of a
+group-meaned variable is the \emph{between-subject-effect}. The de-meaned variable
+is then the centered version of the group-meaned variable. De-meaning is
+sometimes also called person-mean centering or centering within clusters.
+The regression coefficient of a de-meaned variable represents the
+\emph{within-subject-effect}.
 }
 
-\subsection{De-meaning with continuous predictors}{
+\section{De-meaning with continuous predictors}{
+
+
 For continuous time-varying predictors, the recommendation is to include
 both their de-meaned and group-meaned versions as fixed effects, but not
 the raw (untransformed) time-varying predictors themselves. The de-meaned
@@ -155,7 +185,9 @@ the within-subject effect, while the coefficient of the group-meaned
 predictor indicates the between-subject effect.
 }
 
-\subsection{De-meaning with binary predictors}{
+\section{De-meaning with binary predictors}{
+
+
 For binary time-varying predictors, there are two recommendations. First
 is to include the raw (untransformed) binary predictor as fixed effect
 only and the \emph{de-meaned} variable as random effect (random slope).
@@ -163,51 +195,91 @@ The alternative would be to add the de-meaned version(s) of binary
 time-varying covariates as additional fixed effect as well (instead of
 adding it as random slope). Centering time-varying binary variables to
 obtain within-effects (level 1) isn't necessary. They have a sensible
-interpretation when left in the typical 0/1 format (\cite{Hoffmann 2015,
+interpretation when left in the typical 0/1 format (\emph{Hoffmann 2015,
 chapter 8-2.I}). \code{demean()} will thus coerce categorical time-varying
 predictors to numeric to compute the de- and group-meaned versions for
 these variables, where the raw (untransformed) binary predictor and the
 de-meaned version should be added to the model.
 }
 
-\subsection{De-meaning of factors with more than 2 levels}{
+\section{De-meaning of factors with more than 2 levels}{
+
+
 Factors with more than two levels are demeaned in two ways: first, these
 are also converted to numeric and de-meaned; second, dummy variables
 are created (binary, with 0/1 coding for each level) and these binary
 dummy-variables are de-meaned in the same way (as described above).
-Packages like \pkg{panelr} internally convert factors to dummies before
+Packages like \strong{panelr} internally convert factors to dummies before
 demeaning, so this behaviour can be mimicked here.
 }
 
-\subsection{De-meaning interaction terms}{ There are multiple ways to deal
-with interaction terms of within- and between-effects. A classical approach
-is to simply use the product term of the de-meaned variables (i.e.
-introducing the de-meaned variables as interaction term in the model
-formula, e.g. \code{y ~ x_within * time_within}). This approach, however,
-might be subject to bias (see \cite{Giesselmann & Schmidt-Catran 2020}).
-\cr \cr
-Another option is to first calculate the product term and then apply the
-de-meaning to it. This approach produces an estimator \dQuote{that reflects
+\section{De-meaning interaction terms}{
+
+
+There are multiple ways to deal with interaction terms of within- and
+between-effects.
+\itemize{
+\item A classical approach is to simply use the product term of the de-meaned
+variables (i.e. introducing the de-meaned variables as interaction term
+in the model formula, e.g. \code{y ~ x_within * time_within}). This approach,
+however, might be subject to bias (see \emph{Giesselmann & Schmidt-Catran 2020}).
+\item Another option is to first calculate the product term and then apply the
+de-meaning to it. This approach produces an estimator "that reflects
 unit-level differences of interacted variables whose moderators vary
-within units}, which is desirable if \emph{no} within interaction of
-two time-dependent variables is required. \cr \cr
-A third option, when the interaction should result in a genuine within
+within units", which is desirable if \emph{no} within interaction of
+two time-dependent variables is required. This is what \code{demean()} does
+internally when \code{select} contains interaction terms.
+\item A third option, when the interaction should result in a genuine within
 estimator, is to "double de-mean" the interaction terms
-(\cite{Giesselmann & Schmidt-Catran 2018}), however, this is currently
+(\emph{Giesselmann & Schmidt-Catran 2018}), however, this is currently
 not supported by \code{demean()}. If this is required, the \code{wmb()}
-function from the \pkg{panelr} package should be used. \cr \cr
+function from the \strong{panelr} package should be used.
+}
+
 To de-mean interaction terms for within-between models, simply specify
-the term as interaction for the \code{select}-argument, e.g.
-\code{select = "a*b"} (see 'Examples').
+the term as interaction for the \code{select}-argument, e.g. \code{select = "a*b"}
+(see 'Examples').
 }
 
-\subsection{Analysing panel data with mixed models using lme4}{
-A description of how to translate the
-formulas described in \emph{Bell et al. 2018} into R using \code{lmer()}
-from \pkg{lme4} can be found in
-\href{https://easystats.github.io/parameters/articles/demean.html}{this vignette}.
+\section{De-meaning for cross-classified designs}{
+
+
+\code{demean()} can handle cross-classified designs, where the data has two or
+more groups at the higher (i.e. second) level. In such cases, the
+\code{by}-argument can identify two or more variables that represent the
+cross-classified group- or cluster-IDs. The de-meaned variables for
+cross-classified designs are simply subtracting all group means from each
+individual value, i.e. \emph{fully cluster-mean-centering} (see \emph{Guo et al. 2024}
+for details). Note that de-meaning for cross-classified designs is \emph{not}
+equivalent to de-meaning of nested data structures from models with three or
+more levels. Set \code{nested = TRUE} to explicitly assume a nested design. For
+cross-classified designs, de-meaning is supposed to work for models like
+\code{y ~ x + (1|level3) + (1|level2)}, but \emph{not} for models like
+\code{y ~ x + (1|level3/level2)}. Note that \code{demean()} and \code{degroup()} can't
+handle a mix of nested and cross-classified designs in one model.
 }
+
+\section{De-meaning for nested designs}{
+
+
+\emph{Brincks et al. (2017)} have suggested an algorithm to center variables for
+nested designs, which is implemented in \code{demean()}. For nested designs, set
+\code{nested = TRUE} \emph{and} specify the variables that indicate the different
+levels in descending order in the \code{by} argument. E.g.,
+\verb{by = c("level4", "level3, "level2")} assumes a model like
+\code{y ~ x + (1|level4/level3/level2)}. An alternative notation for the
+\code{by}-argument would be \code{by = "level4/level3/level2"}, similar to the
+formula notation.
 }
+
+\section{Analysing panel data with mixed models using lme4}{
+
+
+A description of how to translate the formulas described in \emph{Bell et al. 2018}
+into R using \code{lmer()} from \strong{lme4} can be found in
+\href{https://easystats.github.io/parameters/articles/demean.html}{this vignette}.
+}
+
 \examples{
 
 data(iris)
@@ -244,12 +316,19 @@ Models: Making an Informed Choice. Quality & Quantity (53); 1051-1074
 \item Bell A, Jones K. 2015. Explaining Fixed Effects: Random Effects
 Modeling of Time-Series Cross-Sectional and Panel Data. Political Science
 Research and Methods, 3(1), 133–153.
+\item Brincks, A. M., Enders, C. K., Llabre, M. M., Bulotsky-Shearer, R. J.,
+Prado, G., and Feaster, D. J. (2017). Centering Predictor Variables in
+Three-Level Contextual Models. Multivariate Behavioral Research, 52(2),
+149–163. https://doi.org/10.1080/00273171.2016.1256753
 \item Gelman A, Hill J. 2007. Data Analysis Using Regression and
 Multilevel/Hierarchical Models. Analytical Methods for Social Research.
 Cambridge, New York: Cambridge University Press
 \item Giesselmann M, Schmidt-Catran, AW. 2020. Interactions in fixed
 effects regression models. Sociological Methods & Research, 1–28.
 https://doi.org/10.1177/0049124120914934
+\item Guo Y, Dhaliwal J, Rights JD. 2024. Disaggregating level-specific effects
+in cross-classified multilevel models. Behavior Research Methods, 56(4),
+3023–3057.
 \item Heisig JP, Schaeffer M, Giesecke J. 2017. The Costs of Simplicity:
 Why Multilevel Models May Benefit from Accounting for Cross-Cluster
 Differences in the Effects of Controls. American Sociological Review 82
diff --git a/man/describe_distribution.Rd b/man/describe_distribution.Rd
index 369bd9ef6..80b69e115 100644
--- a/man/describe_distribution.Rd
+++ b/man/describe_distribution.Rd
@@ -86,8 +86,10 @@ vector before the mean is computed.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/extract_column_names.Rd b/man/extract_column_names.Rd
index 6805d9569..3ea5da7dc 100644
--- a/man/extract_column_names.Rd
+++ b/man/extract_column_names.Rd
@@ -2,9 +2,7 @@
 % Please edit documentation in R/data_select.R, R/extract_column_names.R
 \name{data_select}
 \alias{data_select}
-\alias{get_columns}
 \alias{extract_column_names}
-\alias{data_find}
 \alias{find_columns}
 \title{Find or get columns in a data frame based on search patterns}
 \usage{
@@ -18,16 +16,6 @@ data_select(
   ...
 )
 
-get_columns(
-  data,
-  select = NULL,
-  exclude = NULL,
-  ignore_case = FALSE,
-  regex = FALSE,
-  verbose = TRUE,
-  ...
-)
-
 extract_column_names(
   data,
   select = NULL,
@@ -38,16 +26,6 @@ extract_column_names(
   ...
 )
 
-data_find(
-  data,
-  select = NULL,
-  exclude = NULL,
-  ignore_case = FALSE,
-  regex = FALSE,
-  verbose = TRUE,
-  ...
-)
-
 find_columns(
   data,
   select = NULL,
@@ -65,8 +43,10 @@ find_columns(
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
@@ -174,7 +154,7 @@ outer(iris, starts_with("Sep"))
 }\if{html}{\out{</div>}}
 }
 \examples{
-# Find columns names by pattern
+# Find column names by pattern
 extract_column_names(iris, starts_with("Sepal"))
 extract_column_names(iris, ends_with("Width"))
 extract_column_names(iris, regex("\\\\."))
@@ -187,6 +167,9 @@ extract_column_names(iris, starts_with("Sepal"), exclude = contains("Width"))
 numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5
 extract_column_names(iris, numeric_mean_35)
 
+# find range of colum names by range, using character vector
+extract_column_names(mtcars, c("cyl:hp", "wt"))
+
 # rename returned columns for "data_select()"
 head(data_select(mtcars, c(`Miles per Gallon` = "mpg", Cylinders = "cyl")))
 }
diff --git a/man/labels_to_levels.Rd b/man/labels_to_levels.Rd
index 8024eb2d3..163eb0eaa 100644
--- a/man/labels_to_levels.Rd
+++ b/man/labels_to_levels.Rd
@@ -33,8 +33,10 @@ allowed.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/mean_sd.Rd b/man/mean_sd.Rd
index f0ea239f8..33eeb4bc5 100644
--- a/man/mean_sd.Rd
+++ b/man/mean_sd.Rd
@@ -5,7 +5,7 @@
 \alias{median_mad}
 \title{Summary Helpers}
 \usage{
-mean_sd(x, times = 1L, remove_na = TRUE, named = TRUE, na.rm = TRUE, ...)
+mean_sd(x, times = 1L, remove_na = TRUE, named = TRUE, ...)
 
 median_mad(
   x,
@@ -13,7 +13,6 @@ median_mad(
   remove_na = TRUE,
   constant = 1.4826,
   named = TRUE,
-  na.rm = TRUE,
   ...
 )
 }
@@ -29,8 +28,6 @@ or not (\code{FALSE}, default)?}
 \item{named}{Should the vector be named?
 (E.g., \code{c("-SD" = -1, Mean = 1, "+SD" = 2)}.)}
 
-\item{na.rm}{Deprecated. Please use \code{remove_na} instead.}
-
 \item{...}{Not used.}
 
 \item{constant}{scale factor.}
diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd
index d7a6dfc96..6c06ac3b1 100644
--- a/man/means_by_group.Rd
+++ b/man/means_by_group.Rd
@@ -8,15 +8,7 @@
 \usage{
 means_by_group(x, ...)
 
-\method{means_by_group}{numeric}(
-  x,
-  by = NULL,
-  ci = 0.95,
-  weights = NULL,
-  digits = NULL,
-  group = NULL,
-  ...
-)
+\method{means_by_group}{numeric}(x, by = NULL, ci = 0.95, weights = NULL, digits = NULL, ...)
 
 \method{means_by_group}{data.frame}(
   x,
@@ -29,7 +21,6 @@ means_by_group(x, ...)
   ignore_case = FALSE,
   regex = FALSE,
   verbose = TRUE,
-  group = NULL,
   ...
 )
 }
@@ -56,14 +47,14 @@ weights are used.}
 \item{digits}{Optional scalar, indicating the amount of digits after decimal
 point when rounding estimates and values.}
 
-\item{group}{Deprecated. Use \code{by} instead.}
-
 \item{select}{Variables that will be included when performing the required
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/normalize.Rd b/man/normalize.Rd
index 4a9a61a68..c325e98fe 100644
--- a/man/normalize.Rd
+++ b/man/normalize.Rd
@@ -71,8 +71,10 @@ the normalized vectors are rescaled to a range from \code{0 + include_bounds} to
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/ranktransform.Rd b/man/ranktransform.Rd
index c23105735..7046db2b5 100644
--- a/man/ranktransform.Rd
+++ b/man/ranktransform.Rd
@@ -39,8 +39,10 @@ details.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/recode_values.Rd b/man/recode_values.Rd
index 9810c0a2d..dece902f7 100644
--- a/man/recode_values.Rd
+++ b/man/recode_values.Rd
@@ -4,7 +4,6 @@
 \alias{recode_values}
 \alias{recode_values.numeric}
 \alias{recode_values.data.frame}
-\alias{change_code}
 \title{Recode old values of variables into new values}
 \usage{
 recode_values(x, ...)
@@ -31,20 +30,6 @@ recode_values(x, ...)
   verbose = TRUE,
   ...
 )
-
-change_code(
-  x,
-  select = NULL,
-  exclude = NULL,
-  recode = NULL,
-  default = NULL,
-  preserve_na = TRUE,
-  append = FALSE,
-  ignore_case = FALSE,
-  regex = FALSE,
-  verbose = TRUE,
-  ...
-)
 }
 \arguments{
 \item{x}{A data frame, numeric or character vector, or factor.}
@@ -75,8 +60,10 @@ default value.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/rescale.Rd b/man/rescale.Rd
index 016a6f841..490964777 100644
--- a/man/rescale.Rd
+++ b/man/rescale.Rd
@@ -67,8 +67,10 @@ the input vector (\code{range(x)}).}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd
index 4a67d4100..d9651decb 100644
--- a/man/rescale_weights.Rd
+++ b/man/rescale_weights.Rd
@@ -4,7 +4,7 @@
 \alias{rescale_weights}
 \title{Rescale design weights for multilevel analysis}
 \usage{
-rescale_weights(data, by, probability_weights, nest = FALSE, group = NULL)
+rescale_weights(data, by, probability_weights, nest = FALSE)
 }
 \arguments{
 \item{data}{A data frame.}
@@ -21,8 +21,6 @@ sampling) weights of the survey data (level-1-weight).}
 \item{nest}{Logical, if \code{TRUE} and \code{by} indicates at least two
 group variables, then groups are "nested", i.e. groups are now a
 combination from each group level of the variables in \code{by}.}
-
-\item{group}{Deprecated. Use \code{by} instead.}
 }
 \value{
 \code{data}, including the new weighting variables: \code{pweights_a}
diff --git a/man/reverse.Rd b/man/reverse.Rd
index 6304dffc6..5767908ff 100644
--- a/man/reverse.Rd
+++ b/man/reverse.Rd
@@ -45,8 +45,10 @@ usually only makes sense when factor levels are numeric, not characters.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/row_count.Rd b/man/row_count.Rd
new file mode 100644
index 000000000..7bf54fe5f
--- /dev/null
+++ b/man/row_count.Rd
@@ -0,0 +1,132 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/row_count.R
+\name{row_count}
+\alias{row_count}
+\title{Count specific values row-wise}
+\usage{
+row_count(
+  data,
+  select = NULL,
+  exclude = NULL,
+  count = NULL,
+  allow_coercion = TRUE,
+  ignore_case = FALSE,
+  regex = FALSE,
+  verbose = TRUE
+)
+}
+\arguments{
+\item{data}{A data frame with at least two columns, where number of specific
+values are counted row-wise.}
+
+\item{select}{Variables that will be included when performing the required
+tasks. Can be either
+\itemize{
+\item a variable specified as a literal variable name (e.g., \code{column_name}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
+\item a formula with variable names (e.g., \code{~column_1 + column_2}),
+\item a vector of positive integers, giving the positions counting from the left
+(e.g. \code{1} or \code{c(1, 3, 5)}),
+\item a vector of negative integers, giving the positions counting from the
+right (e.g., \code{-1} or \code{-1:-3}),
+\item one of the following select-helpers: \code{starts_with()}, \code{ends_with()},
+\code{contains()}, a range using \code{:} or \code{regex("")}. \code{starts_with()},
+\code{ends_with()}, and  \code{contains()} accept several patterns, e.g
+\code{starts_with("Sep", "Petal")}.
+\item or a function testing for logical conditions, e.g. \code{is.numeric()} (or
+\code{is.numeric}), or any user-defined function that selects the variables
+for which the function returns \code{TRUE} (like: \code{foo <- function(x) mean(x) > 3}),
+\item ranges specified via literal variable names, select-helpers (except
+\code{regex()}) and (user-defined) functions can be negated, i.e. return
+non-matching elements, when prefixed with a \code{-}, e.g. \code{-ends_with("")},
+\code{-is.numeric} or \code{-(Sepal.Width:Petal.Length)}. \strong{Note:} Negation means
+that matches are \emph{excluded}, and thus, the \code{exclude} argument can be
+used alternatively. For instance, \code{select=-ends_with("Length")} (with
+\code{-}) is equivalent to \code{exclude=ends_with("Length")} (no \code{-}). In case
+negation should not work as expected, use the \code{exclude} argument instead.
+}
+
+If \code{NULL}, selects all columns. Patterns that found no matches are silently
+ignored, e.g. \code{extract_column_names(iris, select = c("Species", "Test"))}
+will just return \code{"Species"}.}
+
+\item{exclude}{See \code{select}, however, column names matched by the pattern
+from \code{exclude} will be excluded instead of selected. If \code{NULL} (the default),
+excludes no columns.}
+
+\item{count}{The value for which the row sum should be computed. May be a
+numeric value, a character string (for factors or character vectors), \code{NA} or
+\code{Inf}.}
+
+\item{allow_coercion}{Logical. If \code{FALSE}, \code{count} matches only values of same
+class (i.e. when \code{count = 2}, the value \code{"2"} is not counted and vice versa).
+By default, when \code{allow_coercion = TRUE}, \code{count = 2} also matches \code{"2"}. In
+order to count factor levels in the data, use \code{count = factor("level")}. See
+'Examples'.}
+
+\item{ignore_case}{Logical, if \code{TRUE} and when one of the select-helpers or
+a regular expression is used in \code{select}, ignores lower/upper case in the
+search pattern when matching against variable names.}
+
+\item{regex}{Logical, if \code{TRUE}, the search pattern from \code{select} will be
+treated as regular expression. When \code{regex = TRUE}, select \emph{must} be a
+character string (or a variable containing a character string) and is not
+allowed to be one of the supported select-helpers or a character vector
+of length > 1. \code{regex = TRUE} is comparable to using one of the two
+select-helpers, \code{select = contains("")} or \code{select = regex("")}, however,
+since the select-helpers may not work when called from inside other
+functions (see 'Details'), this argument may be used as workaround.}
+
+\item{verbose}{Toggle warnings.}
+}
+\value{
+A vector with row-wise counts of values specified in \code{count}.
+}
+\description{
+\code{row_count()} mimics base R's \code{rowSums()}, with sums for a
+specific value indicated by \code{count}. Hence, it is similar to
+\code{rowSums(x == count, na.rm = TRUE)}, but offers some more options, including
+strict comparisons. Comparisons using \code{==} coerce values to atomic vectors,
+thus both \code{2 == 2} and \code{"2" == 2} are \code{TRUE}. In \code{row_count()}, it is also
+possible to make "type safe" comparisons using the \code{allow_coercion} argument,
+where \code{"2" == 2} is not true.
+}
+\examples{
+dat <- data.frame(
+  c1 = c(1, 2, NA, 4),
+  c2 = c(NA, 2, NA, 5),
+  c3 = c(NA, 4, NA, NA),
+  c4 = c(2, 3, 7, 8)
+)
+
+# count all 4s per row
+row_count(dat, count = 4)
+# count all missing values per row
+row_count(dat, count = NA)
+
+dat <- data.frame(
+  c1 = c("1", "2", NA, "3"),
+  c2 = c(NA, "2", NA, "3"),
+  c3 = c(NA, 4, NA, NA),
+  c4 = c(2, 3, 7, Inf)
+)
+# count all 2s and "2"s per row
+row_count(dat, count = 2)
+# only count 2s, but not "2"s
+row_count(dat, count = 2, allow_coercion = FALSE)
+
+dat <- data.frame(
+  c1 = factor(c("1", "2", NA, "3")),
+  c2 = c("2", "1", NA, "3"),
+  c3 = c(NA, 4, NA, NA),
+  c4 = c(2, 3, 7, Inf)
+)
+# find only character "2"s
+row_count(dat, count = "2", allow_coercion = FALSE)
+# find only factor level "2"s
+row_count(dat, count = factor("2"), allow_coercion = FALSE)
+
+}
diff --git a/man/row_means.Rd b/man/row_means.Rd
index c347fc6f1..43d85b5b0 100644
--- a/man/row_means.Rd
+++ b/man/row_means.Rd
@@ -2,7 +2,8 @@
 % Please edit documentation in R/row_means.R
 \name{row_means}
 \alias{row_means}
-\title{Row means (optionally with minimum amount of valid values)}
+\alias{row_sums}
+\title{Row means or sums (optionally with minimum amount of valid values)}
 \usage{
 row_means(
   data,
@@ -15,16 +16,31 @@ row_means(
   remove_na = FALSE,
   verbose = TRUE
 )
+
+row_sums(
+  data,
+  select = NULL,
+  exclude = NULL,
+  min_valid = NULL,
+  digits = NULL,
+  ignore_case = FALSE,
+  regex = FALSE,
+  remove_na = FALSE,
+  verbose = TRUE
+)
 }
 \arguments{
-\item{data}{A data frame with at least two columns, where row means are applied.}
+\item{data}{A data frame with at least two columns, where row means or row
+sums are applied.}
 
 \item{select}{Variables that will be included when performing the required
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
@@ -58,9 +74,9 @@ excludes no columns.}
 \item{min_valid}{Optional, a numeric value of length 1. May either be
 \itemize{
 \item a numeric value that indicates the amount of valid values per row to
-calculate the row mean;
+calculate the row mean or row sum;
 \item or a value between \code{0} and \code{1}, indicating a proportion of valid values per
-row to calculate the row mean (see 'Details').
+row to calculate the row mean or row sum (see 'Details').
 \item \code{NULL} (default), in which all cases are considered.
 }
 
@@ -84,28 +100,31 @@ since the select-helpers may not work when called from inside other
 functions (see 'Details'), this argument may be used as workaround.}
 
 \item{remove_na}{Logical, if \code{TRUE} (default), removes missing (\code{NA}) values
-before calculating row means. Only applies if \code{min_valuid} is not specified.}
+before calculating row means or row sums. Only applies if \code{min_valid} is not
+specified.}
 
 \item{verbose}{Toggle warnings.}
 }
 \value{
-A vector with row means for those rows with at least \code{n} valid values.
+A vector with row means (for \code{row_means()}) or row sums (for
+\code{row_sums()}) for those rows with at least \code{n} valid values.
 }
 \description{
-This function is similar to the SPSS \code{MEAN.n} function and computes
-row means from a data frame or matrix if at least \code{min_valid} values of a row are
-valid (and not \code{NA}).
+This function is similar to the SPSS \code{MEAN.n} or \code{SUM.n}
+function and computes row means or row sums from a data frame or matrix if at
+least \code{min_valid} values of a row are valid (and not \code{NA}).
 }
 \details{
-Rounding to a negative number of \code{digits} means rounding to a power of
-ten, for example \code{row_means(df, 3, digits = -2)} rounds to the nearest hundred.
-For \code{min_valid}, if not \code{NULL}, \code{min_valid} must be a numeric value from \code{0}
-to \code{ncol(data)}. If a row in the data frame has at least \code{min_valid}
-non-missing values, the row mean is returned. If \code{min_valid} is a non-integer
-value from 0 to 1, \code{min_valid} is considered to indicate the proportion of
-required non-missing values per row. E.g., if \code{min_valid = 0.75}, a row must
-have at least \code{ncol(data) * min_valid} non-missing values for the row mean
-to be calculated. See 'Examples'.
+Rounding to a negative number of \code{digits} means rounding to a power
+of ten, for example \code{row_means(df, 3, digits = -2)} rounds to the nearest
+hundred. For \code{min_valid}, if not \code{NULL}, \code{min_valid} must be a numeric value
+from \code{0} to \code{ncol(data)}. If a row in the data frame has at least \code{min_valid}
+non-missing values, the row mean or row sum is returned. If \code{min_valid} is a
+non-integer value from 0 to 1, \code{min_valid} is considered to indicate the
+proportion of required non-missing values per row. E.g., if
+\code{min_valid = 0.75}, a row must have at least \code{ncol(data) * min_valid}
+non-missing values for the row mean or row sum to be calculated. See
+'Examples'.
 }
 \examples{
 dat <- data.frame(
@@ -123,6 +142,7 @@ row_means(dat, remove_na = TRUE)
 
 # needs at least 4 non-missing values per row
 row_means(dat, min_valid = 4) # 1 valid return value
+row_sums(dat, min_valid = 4) # 1 valid return value
 
 # needs at least 3 non-missing values per row
 row_means(dat, min_valid = 3) # 2 valid return values
@@ -135,6 +155,7 @@ row_means(dat, select = c("c1", "c3"), min_valid = 1)
 
 # needs at least 50\% of non-missing values per row
 row_means(dat, min_valid = 0.5) # 3 valid return values
+row_sums(dat, min_valid = 0.5)
 
 # needs at least 75\% of non-missing values per row
 row_means(dat, min_valid = 0.75) # 2 valid return values
diff --git a/man/skewness.Rd b/man/skewness.Rd
index a89d98067..0401e3a40 100644
--- a/man/skewness.Rd
+++ b/man/skewness.Rd
@@ -19,7 +19,6 @@ skewness(x, ...)
   type = "2",
   iterations = NULL,
   verbose = TRUE,
-  na.rm = TRUE,
   ...
 )
 
@@ -31,7 +30,6 @@ kurtosis(x, ...)
   type = "2",
   iterations = NULL,
   verbose = TRUE,
-  na.rm = TRUE,
   ...
 )
 
@@ -61,8 +59,6 @@ errors. If \code{NULL} (default), parametric standard errors are computed.}
 
 \item{verbose}{Toggle warnings and messages.}
 
-\item{na.rm}{Deprecated. Please use \code{remove_na} instead.}
-
 \item{digits}{Number of decimal places.}
 
 \item{test}{Logical, if \code{TRUE}, tests if skewness or kurtosis is
diff --git a/man/slide.Rd b/man/slide.Rd
index ccc6bd7e9..c26943116 100644
--- a/man/slide.Rd
+++ b/man/slide.Rd
@@ -34,8 +34,10 @@ factors or character vectors to numeric values.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/standardize.Rd b/man/standardize.Rd
index 4041f2dc0..fcc8c6ae7 100644
--- a/man/standardize.Rd
+++ b/man/standardize.Rd
@@ -145,8 +145,10 @@ vectors as well.}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/text_format.Rd b/man/text_format.Rd
index 87f045193..14d64b096 100644
--- a/man/text_format.Rd
+++ b/man/text_format.Rd
@@ -2,7 +2,6 @@
 % Please edit documentation in R/text_format.R
 \name{text_format}
 \alias{text_format}
-\alias{format_text}
 \alias{text_fullstop}
 \alias{text_lastchar}
 \alias{text_concatenate}
@@ -20,15 +19,6 @@ text_format(
   ...
 )
 
-format_text(
-  text,
-  sep = ", ",
-  last = " and ",
-  width = NULL,
-  enclose = NULL,
-  ...
-)
-
 text_fullstop(text)
 
 text_lastchar(text, n = 1)
@@ -63,7 +53,11 @@ text elements will not be enclosed.}
 \item{pattern}{Character vector. For \code{data_rename()}, indicates columns that
 should be selected for renaming. Can be \code{NULL} (in which case all columns
 are selected). For \code{data_addprefix()} or \code{data_addsuffix()}, a character
-string, which will be added as prefix or suffix to the column names.}
+string, which will be added as prefix or suffix to the column names. For
+\code{data_rename()}, \code{pattern} can also be a named vector. In this case, names
+are used as values for the \code{replacement} argument (i.e. \code{pattern} can be a
+character vector using \verb{<new name> = "<old name>"} and argument \code{replacement}
+will be ignored then).}
 }
 \value{
 A character string.
diff --git a/man/to_factor.Rd b/man/to_factor.Rd
index e035769ec..d544bdaae 100644
--- a/man/to_factor.Rd
+++ b/man/to_factor.Rd
@@ -36,8 +36,10 @@ the values of \code{x} (i.e. as if using \code{as.factor()}).}
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
diff --git a/man/to_numeric.Rd b/man/to_numeric.Rd
index 7478c9579..39f04c3a9 100644
--- a/man/to_numeric.Rd
+++ b/man/to_numeric.Rd
@@ -11,7 +11,7 @@ to_numeric(x, ...)
   x,
   select = NULL,
   exclude = NULL,
-  dummy_factors = TRUE,
+  dummy_factors = FALSE,
   preserve_levels = FALSE,
   lowest = NULL,
   append = FALSE,
@@ -30,8 +30,10 @@ to_numeric(x, ...)
 tasks. Can be either
 \itemize{
 \item a variable specified as a literal variable name (e.g., \code{column_name}),
-\item a string with the variable name (e.g., \code{"column_name"}), or a character
-vector of variable names (e.g., \code{c("col1", "col2", "col3")}),
+\item a string with the variable name (e.g., \code{"column_name"}), a character
+vector of variable names (e.g., \code{c("col1", "col2", "col3")}), or a
+character vector of variable names including ranges specified via \code{:}
+(e.g., \code{c("col1:col3", "col5")}),
 \item a formula with variable names (e.g., \code{~column_1 + column_2}),
 \item a vector of positive integers, giving the positions counting from the left
 (e.g. \code{1} or \code{c(1, 3, 5)}),
@@ -107,11 +109,11 @@ either numeric levels or dummy variables. The "counterpart" to convert
 variables into factors is \code{to_factor()}.
 }
 \note{
-By default, \code{to_numeric()} converts factors into "binary" dummies, i.e.
+When factors should be converted into multiple "binary" dummies, i.e.
 each factor level is converted into a separate column filled with a binary
-0-1 value. If only one column is required, use \code{dummy_factors = FALSE}. If
-you want to preserve the original factor levels (in case these represent
-numeric values), use \code{preserve_levels = TRUE}.
+0-1 value, set \code{dummy_factors = TRUE}. If you want to preserve the original
+factor levels (in case these represent numeric values), use
+\code{preserve_levels = TRUE}.
 }
 \section{Selection of variables - \code{select} argument}{
 
@@ -126,12 +128,12 @@ to also include the original variables in the returned data frame.
 
 \examples{
 to_numeric(head(ToothGrowth))
-to_numeric(head(ToothGrowth), dummy_factors = FALSE)
+to_numeric(head(ToothGrowth), dummy_factors = TRUE)
 
 # factors
 x <- as.factor(mtcars$gear)
-to_numeric(x, dummy_factors = FALSE)
-to_numeric(x, dummy_factors = FALSE, preserve_levels = TRUE)
+to_numeric(x)
+to_numeric(x, preserve_levels = TRUE)
 # same as:
 coerce_to_numeric(x)
 
diff --git a/pkgdown/_pkgdown.yaml b/pkgdown/_pkgdown.yaml
index d52994e16..31ec901d0 100644
--- a/pkgdown/_pkgdown.yaml
+++ b/pkgdown/_pkgdown.yaml
@@ -71,6 +71,7 @@ reference:
       - kurtosis
       - smoothness
       - skewness
+      - row_count
       - row_means
       - weighted_mean
       - mean_sd
@@ -125,6 +126,11 @@ reference:
       - nhanes_sample
 
 articles:
+  - title: Overview of vignettes
+    navbar: ~
+    contents:
+      - overview_of_vignettes
+
   - title: Data Preparation
     desc: |
       Articles explaining utility of 'datawizard' for data wrangling
diff --git a/tests/testthat/_snaps/categorize.md b/tests/testthat/_snaps/categorize.md
new file mode 100644
index 000000000..9ed3c1115
--- /dev/null
+++ b/tests/testthat/_snaps/categorize.md
@@ -0,0 +1,46 @@
+# categorize labelling ranged
+
+    Code
+      categorize(mtcars$mpg, "equal_length", n_groups = 5)
+    Output
+       [1] 3 3 3 3 2 2 1 3 3 2 2 2 2 2 1 1 1 5 5 5 3 2 2 1 2 4 4 5 2 2 1 3
+
+---
+
+    Code
+      categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "range")
+    Output
+       [1] [19.8,24.5) [19.8,24.5) [19.8,24.5) [19.8,24.5) [15.1,19.8) [15.1,19.8)
+       [7] [10.4,15.1) [19.8,24.5) [19.8,24.5) [15.1,19.8) [15.1,19.8) [15.1,19.8)
+      [13] [15.1,19.8) [15.1,19.8) [10.4,15.1) [10.4,15.1) [10.4,15.1) [29.2,33.9]
+      [19] [29.2,33.9] [29.2,33.9] [19.8,24.5) [15.1,19.8) [15.1,19.8) [10.4,15.1)
+      [25] [15.1,19.8) [24.5,29.2) [24.5,29.2) [29.2,33.9] [15.1,19.8) [15.1,19.8)
+      [31] [10.4,15.1) [19.8,24.5)
+      Levels: [10.4,15.1) [15.1,19.8) [19.8,24.5) [24.5,29.2) [29.2,33.9]
+
+---
+
+    Code
+      categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "observed")
+    Output
+       [1] (21-24.4)   (21-24.4)   (21-24.4)   (21-24.4)   (15.2-19.7) (15.2-19.7)
+       [7] (10.4-15)   (21-24.4)   (21-24.4)   (15.2-19.7) (15.2-19.7) (15.2-19.7)
+      [13] (15.2-19.7) (15.2-19.7) (10.4-15)   (10.4-15)   (10.4-15)   (30.4-33.9)
+      [19] (30.4-33.9) (30.4-33.9) (21-24.4)   (15.2-19.7) (15.2-19.7) (10.4-15)  
+      [25] (15.2-19.7) (26-27.3)   (26-27.3)   (30.4-33.9) (15.2-19.7) (15.2-19.7)
+      [31] (10.4-15)   (21-24.4)  
+      Levels: (10.4-15) (15.2-19.7) (21-24.4) (26-27.3) (30.4-33.9)
+
+# categorize breaks
+
+    Code
+      categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "range", breaks = "inclusive")
+    Output
+       [1] (19.8,24.5] (19.8,24.5] (19.8,24.5] (19.8,24.5] (15.1,19.8] (15.1,19.8]
+       [7] [10.4,15.1] (19.8,24.5] (19.8,24.5] (15.1,19.8] (15.1,19.8] (15.1,19.8]
+      [13] (15.1,19.8] (15.1,19.8] [10.4,15.1] [10.4,15.1] [10.4,15.1] (29.2,33.9]
+      [19] (29.2,33.9] (29.2,33.9] (19.8,24.5] (15.1,19.8] (15.1,19.8] [10.4,15.1]
+      [25] (15.1,19.8] (24.5,29.2] (24.5,29.2] (29.2,33.9] (15.1,19.8] (15.1,19.8]
+      [31] [10.4,15.1] (19.8,24.5]
+      Levels: [10.4,15.1] (15.1,19.8] (19.8,24.5] (24.5,29.2] (29.2,33.9]
+
diff --git a/tests/testthat/_snaps/data_codebook.md b/tests/testthat/_snaps/data_codebook.md
index c390ba890..8f9b9e7b5 100644
--- a/tests/testthat/_snaps/data_codebook.md
+++ b/tests/testthat/_snaps/data_codebook.md
@@ -139,7 +139,7 @@
 # data_codebook efc
 
     Code
-      data_codebook(efc)
+      print(data_codebook(efc), table_width = Inf)
     Output
       efc (100 rows and 5 variables, 5 shown)
       
@@ -162,10 +162,94 @@
       5  | neg_c_7  | Negative impact with 7 items             | numeric     |   3 (3.0%) |  [7, 28] |                                 |         97
       ---------------------------------------------------------------------------------------------------------------------------------------------
 
+---
+
+    Code
+      print(data_codebook(efc), table_width = "auto", remove_duplicates = FALSE)
+    Output
+      efc (100 rows and 5 variables, 5 shown)
+      
+      ID | Name     | Label                                    | Type       
+      ---+----------+------------------------------------------+------------
+      1  | c12hour  | average number of hours of care per week | numeric    
+      ---+----------+------------------------------------------+------------
+      2  | e16sex   | elder's gender                           | numeric    
+         |          |                                          |            
+      ---+----------+------------------------------------------+------------
+      3  | e42dep   | elder's dependency                       | categorical
+         |          |                                          |            
+         |          |                                          |            
+         |          |                                          |            
+      ---+----------+------------------------------------------+------------
+      4  | c172code | carer's level of education               | numeric    
+         |          |                                          |            
+         |          |                                          |            
+      ---+----------+------------------------------------------+------------
+      5  | neg_c_7  | Negative impact with 7 items             | numeric    
+      ----------------------------------------------------------------------
+      
+      ID |   Missings |   Values | Value Labels                    |          N
+      ---+------------+----------+---------------------------------+-----------
+      1  |   2 (2.0%) | [5, 168] |                                 |         98
+      ---+------------+----------+---------------------------------+-----------
+      2  |   0 (0.0%) |        1 | male                            | 46 (46.0%)
+         |            |        2 | female                          | 54 (54.0%)
+      ---+------------+----------+---------------------------------+-----------
+      3  |   3 (3.0%) |        1 | independent                     |  2 ( 2.1%)
+         |            |        2 | slightly dependent              |  4 ( 4.1%)
+         |            |        3 | moderately dependent            | 28 (28.9%)
+         |            |        4 | severely dependent              | 63 (64.9%)
+      ---+------------+----------+---------------------------------+-----------
+      4  | 10 (10.0%) |        1 | low level of education          |  8 ( 8.9%)
+         |            |        2 | intermediate level of education | 66 (73.3%)
+         |            |        3 | high level of education         | 16 (17.8%)
+      ---+------------+----------+---------------------------------+-----------
+      5  |   3 (3.0%) |  [7, 28] |                                 |         97
+      -------------------------------------------------------------------------
+
+---
+
+    Code
+      print(data_codebook(efc), table_width = "auto", remove_duplicates = TRUE)
+    Output
+      efc (100 rows and 5 variables, 5 shown)
+      
+      ID | Name     | Label                                    | Type       
+      ---+----------+------------------------------------------+------------
+      1  | c12hour  | average number of hours of care per week | numeric    
+      ---+----------+------------------------------------------+------------
+      2  | e16sex   | elder's gender                           | numeric    
+      ---+----------+------------------------------------------+------------
+      3  | e42dep   | elder's dependency                       | categorical
+      ---+----------+------------------------------------------+------------
+      4  | c172code | carer's level of education               | numeric    
+      ---+----------+------------------------------------------+------------
+      5  | neg_c_7  | Negative impact with 7 items             | numeric    
+      ----------------------------------------------------------------------
+      
+      ID |   Missings |   Values | Value Labels                    |          N
+      ---+------------+----------+---------------------------------+-----------
+      1  |   2 (2.0%) | [5, 168] |                                 |         98
+      ---+------------+----------+---------------------------------+-----------
+      2  |   0 (0.0%) |        1 | male                            | 46 (46.0%)
+         |            |        2 | female                          | 54 (54.0%)
+      ---+------------+----------+---------------------------------+-----------
+      3  |   3 (3.0%) |        1 | independent                     |  2 ( 2.1%)
+         |            |        2 | slightly dependent              |  4 ( 4.1%)
+         |            |        3 | moderately dependent            | 28 (28.9%)
+         |            |        4 | severely dependent              | 63 (64.9%)
+      ---+------------+----------+---------------------------------+-----------
+      4  | 10 (10.0%) |        1 | low level of education          |  8 ( 8.9%)
+         |            |        2 | intermediate level of education | 66 (73.3%)
+         |            |        3 | high level of education         | 16 (17.8%)
+      ---+------------+----------+---------------------------------+-----------
+      5  |   3 (3.0%) |  [7, 28] |                                 |         97
+      -------------------------------------------------------------------------
+
 # data_codebook efc, variable_label_width
 
     Code
-      data_codebook(efc, variable_label_width = 30)
+      print(out, table_width = Inf)
     Output
       efc (100 rows and 5 variables, 5 shown)
       
@@ -189,10 +273,97 @@
       5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%) |  [7, 28] |                                 |         97
       ---------------------------------------------------------------------------------------------------------------------------------
 
+---
+
+    Code
+      print(out, table_width = "auto", remove_duplicates = FALSE)
+    Output
+      efc (100 rows and 5 variables, 5 shown)
+      
+      ID | Name     | Label                        | Type        |   Missings
+      ---+----------+------------------------------+-------------+-----------
+      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)
+         |          | care per week                |             |           
+      ---+----------+------------------------------+-------------+-----------
+      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)
+         |          |                              |             |           
+      ---+----------+------------------------------+-------------+-----------
+      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)
+         |          |                              |             |           
+         |          |                              |             |           
+         |          |                              |             |           
+      ---+----------+------------------------------+-------------+-----------
+      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)
+         |          |                              |             |           
+         |          |                              |             |           
+      ---+----------+------------------------------+-------------+-----------
+      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)
+      -----------------------------------------------------------------------
+      
+      ID |   Values | Value Labels                    |          N
+      ---+----------+---------------------------------+-----------
+      1  | [5, 168] |                                 |         98
+         |          |                                 |           
+      ---+----------+---------------------------------+-----------
+      2  |        1 | male                            | 46 (46.0%)
+         |        2 | female                          | 54 (54.0%)
+      ---+----------+---------------------------------+-----------
+      3  |        1 | independent                     |  2 ( 2.1%)
+         |        2 | slightly dependent              |  4 ( 4.1%)
+         |        3 | moderately dependent            | 28 (28.9%)
+         |        4 | severely dependent              | 63 (64.9%)
+      ---+----------+---------------------------------+-----------
+      4  |        1 | low level of education          |  8 ( 8.9%)
+         |        2 | intermediate level of education | 66 (73.3%)
+         |        3 | high level of education         | 16 (17.8%)
+      ---+----------+---------------------------------+-----------
+      5  |  [7, 28] |                                 |         97
+      ------------------------------------------------------------
+
+---
+
+    Code
+      print(out, table_width = "auto", remove_duplicates = TRUE)
+    Output
+      efc (100 rows and 5 variables, 5 shown)
+      
+      ID | Name     | Label                        | Type        |   Missings
+      ---+----------+------------------------------+-------------+-----------
+      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)
+         |          | care per week                |             |           
+      ---+----------+------------------------------+-------------+-----------
+      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)
+      ---+----------+------------------------------+-------------+-----------
+      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)
+      ---+----------+------------------------------+-------------+-----------
+      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)
+      ---+----------+------------------------------+-------------+-----------
+      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)
+      -----------------------------------------------------------------------
+      
+      ID |   Values | Value Labels                    |          N
+      ---+----------+---------------------------------+-----------
+      1  | [5, 168] |                                 |         98
+      ---+----------+---------------------------------+-----------
+      2  |        1 | male                            | 46 (46.0%)
+         |        2 | female                          | 54 (54.0%)
+      ---+----------+---------------------------------+-----------
+      3  |        1 | independent                     |  2 ( 2.1%)
+         |        2 | slightly dependent              |  4 ( 4.1%)
+         |        3 | moderately dependent            | 28 (28.9%)
+         |        4 | severely dependent              | 63 (64.9%)
+      ---+----------+---------------------------------+-----------
+      4  |        1 | low level of education          |  8 ( 8.9%)
+         |        2 | intermediate level of education | 66 (73.3%)
+         |        3 | high level of education         | 16 (17.8%)
+      ---+----------+---------------------------------+-----------
+      5  |  [7, 28] |                                 |         97
+      ------------------------------------------------------------
+
 # data_codebook efc, value_label_width
 
     Code
-      data_codebook(efc, variable_label_width = 30, value_label_width = 15)
+      print(out, table_width = Inf)
     Output
       efc (100 rows and 5 variables, 5 shown)
       
@@ -216,6 +387,93 @@
       5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%) |  [7, 28] |                  |         97
       ------------------------------------------------------------------------------------------------------------------
 
+---
+
+    Code
+      print(out, table_width = "auto", remove_duplicates = FALSE)
+    Output
+      efc (100 rows and 5 variables, 5 shown)
+      
+      ID | Name     | Label                        | Type        |   Missings
+      ---+----------+------------------------------+-------------+-----------
+      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)
+         |          | care per week                |             |           
+      ---+----------+------------------------------+-------------+-----------
+      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)
+         |          |                              |             |           
+      ---+----------+------------------------------+-------------+-----------
+      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)
+         |          |                              |             |           
+         |          |                              |             |           
+         |          |                              |             |           
+      ---+----------+------------------------------+-------------+-----------
+      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)
+         |          |                              |             |           
+         |          |                              |             |           
+      ---+----------+------------------------------+-------------+-----------
+      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)
+      -----------------------------------------------------------------------
+      
+      ID |   Values | Value Labels     |          N
+      ---+----------+------------------+-----------
+      1  | [5, 168] |                  |         98
+         |          |                  |           
+      ---+----------+------------------+-----------
+      2  |        1 | male             | 46 (46.0%)
+         |        2 | female           | 54 (54.0%)
+      ---+----------+------------------+-----------
+      3  |        1 | independent      |  2 ( 2.1%)
+         |        2 | slightly...      |  4 ( 4.1%)
+         |        3 | moderately...    | 28 (28.9%)
+         |        4 | severely...      | 63 (64.9%)
+      ---+----------+------------------+-----------
+      4  |        1 | low level of...  |  8 ( 8.9%)
+         |        2 | intermediate...  | 66 (73.3%)
+         |        3 | high level of... | 16 (17.8%)
+      ---+----------+------------------+-----------
+      5  |  [7, 28] |                  |         97
+      ---------------------------------------------
+
+---
+
+    Code
+      print(out, table_width = "auto", remove_duplicates = TRUE)
+    Output
+      efc (100 rows and 5 variables, 5 shown)
+      
+      ID | Name     | Label                        | Type        |   Missings
+      ---+----------+------------------------------+-------------+-----------
+      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)
+         |          | care per week                |             |           
+      ---+----------+------------------------------+-------------+-----------
+      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)
+      ---+----------+------------------------------+-------------+-----------
+      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)
+      ---+----------+------------------------------+-------------+-----------
+      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)
+      ---+----------+------------------------------+-------------+-----------
+      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)
+      -----------------------------------------------------------------------
+      
+      ID |   Values | Value Labels     |          N
+      ---+----------+------------------+-----------
+      1  | [5, 168] |                  |         98
+      ---+----------+------------------+-----------
+      2  |        1 | male             | 46 (46.0%)
+         |        2 | female           | 54 (54.0%)
+      ---+----------+------------------+-----------
+      3  |        1 | independent      |  2 ( 2.1%)
+         |        2 | slightly...      |  4 ( 4.1%)
+         |        3 | moderately...    | 28 (28.9%)
+         |        4 | severely...      | 63 (64.9%)
+      ---+----------+------------------+-----------
+      4  |        1 | low level of...  |  8 ( 8.9%)
+         |        2 | intermediate...  | 66 (73.3%)
+         |        3 | high level of... | 16 (17.8%)
+      ---+----------+------------------+-----------
+      5  |  [7, 28] |                  |         97
+      ---------------------------------------------
+
 # data_codebook truncated data
 
     Code
diff --git a/tests/testthat/_snaps/data_tabulate.md b/tests/testthat/_snaps/data_tabulate.md
index 59a20dc01..ffde63088 100644
--- a/tests/testthat/_snaps/data_tabulate.md
+++ b/tests/testthat/_snaps/data_tabulate.md
@@ -259,7 +259,7 @@
 
     Code
       print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       efc$c172code |       male |     female | Total
       -------------+------------+------------+------
@@ -288,7 +288,7 @@
 
     Code
       print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       efc$c172code |       male |     female | Total
       -------------+------------+------------+------
@@ -317,7 +317,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -348,7 +348,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -378,7 +378,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
-        include_na = FALSE))
+        remove_na = TRUE))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -409,7 +409,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
-        include_na = FALSE, weights = "weights"))
+        remove_na = TRUE, weights = "weights"))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -497,7 +497,7 @@
 
     Code
       print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       [1] "|efc$c172code |       male|     female| Total|"
       [2] "|:------------|----------:|----------:|-----:|"
@@ -534,7 +534,7 @@
 
     Code
       print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       [1] "|efc$c172code |       male|     female| Total|"
       [2] "|:------------|----------:|----------:|-----:|"
diff --git a/tests/testthat/_snaps/data_to_numeric.md b/tests/testthat/_snaps/data_to_numeric.md
index 42cb00b67..e963890a5 100644
--- a/tests/testthat/_snaps/data_to_numeric.md
+++ b/tests/testthat/_snaps/data_to_numeric.md
@@ -1,7 +1,7 @@
 # convert data frame to numeric
 
     Code
-      to_numeric(head(ToothGrowth))
+      to_numeric(head(ToothGrowth), dummy_factors = TRUE)
     Output
          len supp.OJ supp.VC dose
       1  4.2       0       1  0.5
@@ -27,7 +27,7 @@
 # convert factor to numeric
 
     Code
-      to_numeric(f)
+      to_numeric(f, dummy_factors = TRUE)
     Output
          a c i s t
       1  0 0 0 1 0
diff --git a/tests/testthat/_snaps/demean.md b/tests/testthat/_snaps/demean.md
index 7f12d263d..a1c2da4a3 100644
--- a/tests/testthat/_snaps/demean.md
+++ b/tests/testthat/_snaps/demean.md
@@ -23,13 +23,13 @@
     Code
       head(x)
     Output
-        Sepal.Length_between Species_between binary_between Species_setosa_between
-      1             5.925000        0.850000          0.375              0.4250000
-      2             5.925000        0.850000          0.375              0.4250000
-      3             5.925000        0.850000          0.375              0.4250000
-      4             5.862222        1.133333          0.400              0.2888889
-      5             5.925000        0.850000          0.375              0.4250000
-      6             5.862222        1.133333          0.400              0.2888889
+        Sepal.Length_between binary_between Species_between Species_setosa_between
+      1             5.925000          0.375        0.850000              0.4250000
+      2             5.925000          0.375        0.850000              0.4250000
+      3             5.925000          0.375        0.850000              0.4250000
+      4             5.862222          0.400        1.133333              0.2888889
+      5             5.925000          0.375        0.850000              0.4250000
+      6             5.862222          0.400        1.133333              0.2888889
         Species_versicolor_between Species_virginica_between Sepal.Length_within
       1                  0.3000000                 0.2750000          -0.8250000
       2                  0.3000000                 0.2750000          -1.0250000
@@ -37,13 +37,13 @@
       4                  0.2888889                 0.4222222          -1.2622222
       5                  0.3000000                 0.2750000          -0.9250000
       6                  0.2888889                 0.4222222          -0.4622222
-        Species_within binary_within Species_setosa_within Species_versicolor_within
-      1      -0.850000        -0.375             0.5750000                -0.3000000
-      2      -0.850000         0.625             0.5750000                -0.3000000
-      3      -0.850000        -0.375             0.5750000                -0.3000000
-      4      -1.133333         0.600             0.7111111                -0.2888889
-      5      -0.850000         0.625             0.5750000                -0.3000000
-      6      -1.133333        -0.400             0.7111111                -0.2888889
+        binary_within Species_within Species_setosa_within Species_versicolor_within
+      1        -0.375      -0.850000             0.5750000                -0.3000000
+      2         0.625      -0.850000             0.5750000                -0.3000000
+      3        -0.375      -0.850000             0.5750000                -0.3000000
+      4         0.600      -1.133333             0.7111111                -0.2888889
+      5         0.625      -0.850000             0.5750000                -0.3000000
+      6        -0.400      -1.133333             0.7111111                -0.2888889
         Species_virginica_within
       1               -0.2750000
       2               -0.2750000
diff --git a/tests/testthat/test-categorize.R b/tests/testthat/test-categorize.R
index 0e0b5d317..30453d9ad 100644
--- a/tests/testthat/test-categorize.R
+++ b/tests/testthat/test-categorize.R
@@ -1,5 +1,5 @@
 set.seed(123)
-d <- sample(1:10, size = 500, replace = TRUE)
+d <- sample.int(10, size = 500, replace = TRUE)
 
 test_that("recode median", {
   expect_identical(categorize(d), ifelse(d >= median(d), 2, 1))
@@ -22,7 +22,7 @@ test_that("recode quantile", {
 })
 
 set.seed(123)
-d <- sample(1:100, size = 1000, replace = TRUE)
+d <- sample.int(100, size = 1000, replace = TRUE)
 
 test_that("recode range", {
   expect_error(categorize(d, split = "range"))
@@ -84,7 +84,7 @@ test_that("recode length", {
 })
 
 set.seed(123)
-x <- sample(1:10, size = 30, replace = TRUE)
+x <- sample.int(10, size = 30, replace = TRUE)
 test_that("recode factor labels", {
   expect_type(categorize(x, "equal_length", n_groups = 3), "double")
   expect_s3_class(categorize(x, "equal_length", n_groups = 3, labels = c("low", "mid", "high")), "factor")
@@ -232,3 +232,21 @@ test_that("categorize regex", {
     categorize(mtcars, select = "mpg")
   )
 })
+
+
+# labelling ranges ------------------------------
+test_that("categorize labelling ranged", {
+  data(mtcars)
+  expect_snapshot(categorize(mtcars$mpg, "equal_length", n_groups = 5))
+  expect_snapshot(categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "range"))
+  expect_snapshot(categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "observed"))
+})
+
+test_that("categorize breaks", {
+  data(mtcars)
+  expect_snapshot(categorize(mtcars$mpg, "equal_length", n_groups = 5, labels = "range", breaks = "inclusive"))
+  expect_error(
+    categorize(mtcars$mpg, "equal_length", n_groups = 5, breaks = "something"),
+    regex = "should be one of"
+  )
+})
diff --git a/tests/testthat/test-center.R b/tests/testthat/test-center.R
index 7bff1ebc9..e7e347848 100644
--- a/tests/testthat/test-center.R
+++ b/tests/testthat/test-center.R
@@ -169,8 +169,7 @@ test_that("center, factors (grouped data)", {
     poorman::ungroup() %>%
     poorman::pull(Species)
 
-  manual <- iris %>%
-    poorman::pull(Species)
+  manual <- poorman::pull(iris, Species)
 
   expect_identical(datawizard, manual)
 })
diff --git a/tests/testthat/test-coef_var.R b/tests/testthat/test-coef_var.R
index a55eb7b96..2ae2275cd 100644
--- a/tests/testthat/test-coef_var.R
+++ b/tests/testthat/test-coef_var.R
@@ -29,10 +29,6 @@ test_that("coef_var: argument 'remove_na' works", {
   )
 })
 
-test_that("coef_var: deprecation warning", {
-  expect_warning(coef_var(c(1:10, NA), na.rm = TRUE))
-})
-
 test_that("coef_var: method 'unbiased' needs argument 'n' when sigma and mu are provided", {
   expect_error(
     coef_var(1:10, method = "unbiased", mu = 10, sigma = 20),
diff --git a/tests/testthat/test-data_codebook.R b/tests/testthat/test-data_codebook.R
index 26a67ccf6..06e9bd2f9 100644
--- a/tests/testthat/test-data_codebook.R
+++ b/tests/testthat/test-data_codebook.R
@@ -19,7 +19,7 @@ test_that("data_codebook NaN and Inf", {
 
   set.seed(123)
   d <- data.frame(
-    x = c(sample(1:15, 100, TRUE), Inf, Inf)
+    x = c(sample.int(15, 100, TRUE), Inf, Inf)
   )
   expect_snapshot(data_codebook(d))
   expect_snapshot(data_codebook(d, range_at = 100))
@@ -38,24 +38,32 @@ test_that("data_codebook iris, select, ID", {
 
 
 test_that("data_codebook efc", {
-  expect_snapshot(data_codebook(efc))
+  expect_snapshot(print(data_codebook(efc), table_width = Inf))
+  expect_snapshot(print(data_codebook(efc), table_width = "auto", remove_duplicates = FALSE))
+  expect_snapshot(print(data_codebook(efc), table_width = "auto", remove_duplicates = TRUE))
 })
 
 
 test_that("data_codebook efc, variable_label_width", {
-  expect_snapshot(data_codebook(efc, variable_label_width = 30))
+  out <- data_codebook(efc, variable_label_width = 30)
+  expect_snapshot(print(out, table_width = Inf))
+  expect_snapshot(print(out, table_width = "auto", remove_duplicates = FALSE))
+  expect_snapshot(print(out, table_width = "auto", remove_duplicates = TRUE))
 })
 
 
 test_that("data_codebook efc, value_label_width", {
-  expect_snapshot(data_codebook(efc, variable_label_width = 30, value_label_width = 15))
+  out <- data_codebook(efc, variable_label_width = 30, value_label_width = 15)
+  expect_snapshot(print(out, table_width = Inf))
+  expect_snapshot(print(out, table_width = "auto", remove_duplicates = FALSE))
+  expect_snapshot(print(out, table_width = "auto", remove_duplicates = TRUE))
 })
 
 
 test_that("data_codebook truncated data", {
   set.seed(123)
   d <- data.frame(
-    a = sample(1:15, 100, TRUE),
+    a = sample.int(15, 100, TRUE),
     b = sample(letters[1:18], 100, TRUE),
     stringsAsFactors = FALSE
   )
@@ -66,7 +74,7 @@ test_that("data_codebook truncated data", {
 test_that("data_codebook mixed numeric lengths", {
   set.seed(123)
   d <- data.frame(
-    a = sample(1:4, 100, TRUE),
+    a = sample.int(4, 100, TRUE),
     b = sample(5:15, 100, TRUE),
     stringsAsFactors = FALSE
   )
@@ -76,7 +84,7 @@ test_that("data_codebook mixed numeric lengths", {
 test_that("data_codebook mixed range_at", {
   set.seed(123)
   d <- data.frame(
-    a = sample(1:4, 100, TRUE),
+    a = sample.int(4, 100, TRUE),
     b = sample(5:15, 100, TRUE),
     stringsAsFactors = FALSE
   )
@@ -87,7 +95,7 @@ test_that("data_codebook mixed range_at", {
 test_that("data_codebook logicals", {
   set.seed(123)
   d <- data.frame(
-    a = sample(1:15, 100, TRUE),
+    a = sample.int(15, 100, TRUE),
     b = sample(letters[1:3], 100, TRUE),
     c = sample(c(TRUE, FALSE), 100, TRUE),
     stringsAsFactors = FALSE
@@ -99,14 +107,14 @@ test_that("data_codebook logicals", {
 test_that("data_codebook labelled data exceptions", {
   set.seed(123)
 
-  f1 <- sample(1:5, 100, TRUE)
+  f1 <- sample.int(5, 100, TRUE)
   f1[f1 == 4] <- NA
   attr(f1, "labels") <- setNames(1:5, c("One", "Two", "Three", "Four", "Five"))
 
-  f2 <- sample(1:5, 100, TRUE)
+  f2 <- sample.int(5, 100, TRUE)
   attr(f2, "labels") <- setNames(c(1:3, 5), c("One", "Two", "Three", "Five"))
 
-  f3 <- sample(1:5, 100, TRUE)
+  f3 <- sample.int(5, 100, TRUE)
   attr(f3, "labels") <- setNames(1:5, c("One", "Two", "Three", "Four", "Five"))
 
   d <- data.frame(f1, f2, f3)
@@ -143,7 +151,7 @@ test_that("data_codebook works with numbers < 1", {
 test_that("data_codebook, big marks", {
   set.seed(123)
   f1 <- factor(sample(c("c", "b", "a"), 1e6, TRUE))
-  f2 <- factor(sample(1:3, 1e6, TRUE))
+  f2 <- factor(sample.int(3, 1e6, TRUE))
   d <- data.frame(f1, f2)
   expect_snapshot(data_codebook(d))
 })
diff --git a/tests/testthat/test-data_match.R b/tests/testthat/test-data_match.R
index 75991b4b2..1a40f39fd 100644
--- a/tests/testthat/test-data_match.R
+++ b/tests/testthat/test-data_match.R
@@ -52,7 +52,7 @@ test_that("data_match works with missing data", {
     data.frame(c172code = 1, e16sex = 2),
     match = "not",
     return_indices = TRUE,
-    drop_na = FALSE
+    remove_na = FALSE
   ))
   expect_identical(x1, 41L)
   x1 <- length(data_match(
@@ -60,7 +60,7 @@ test_that("data_match works with missing data", {
     data.frame(c172code = 1, e16sex = 2),
     match = "not",
     return_indices = TRUE,
-    drop_na = TRUE
+    remove_na = TRUE
   ))
   expect_identical(x1, 36L)
 })
diff --git a/tests/testthat/test-data_modify.R b/tests/testthat/test-data_modify.R
index 9bb0a92d6..a7a153c43 100644
--- a/tests/testthat/test-data_modify.R
+++ b/tests/testthat/test-data_modify.R
@@ -353,6 +353,16 @@ test_that("data_modify errors for non df", {
 })
 
 
+test_that("data_modify errors for empty data frames", {
+  data(mtcars)
+  x <- mtcars[1, ]
+  expect_error(
+    data_modify(x[-1, ], new_var = 5),
+    regex = "empty data frame"
+  )
+})
+
+
 test_that("data_modify errors for non df", {
   data(efc)
   a <- "center(c22hour)" # <---------------- error in variable name
@@ -492,6 +502,20 @@ test_that("data_modify works with functions that return character vectors", {
 })
 
 
+test_that("data_modify 1:n() and similar works in (grouped) data frames", {
+  data(mtcars)
+  out <- data_modify(mtcars, Trials = 1:n()) # nolint
+  expect_identical(out$Trials, 1:32)
+  x <- data_group(mtcars, "gear")
+  out <- data_modify(x, Trials = 1:n()) # nolint
+  expect_identical(out$Trials[out$gear == 3], 1:15)
+  expect_identical(out$Trials[out$gear == 4], 1:12)
+  out <- data_modify(x, Trials = 3:(n() + 2))
+  expect_identical(out$Trials[out$gear == 3], 3:17)
+  expect_identical(out$Trials[out$gear == 4], 3:14)
+})
+
+
 test_that("data_modify .if/.at arguments", {
   data(iris)
   d <- iris[1:5, ]
@@ -550,3 +574,31 @@ test_that("data_modify .if/.at arguments", {
   out <- data_modify(d, new_length = Petal.Length * 2, .if = is.numeric, .modify = round)
   expect_equal(out$new_length, c(3, 3, 3, 3, 3), ignore_attr = TRUE)
 })
+
+
+skip_if_not_installed("withr")
+
+withr::with_environment(
+  new.env(),
+  test_that("data_modify 1:n() and similar works in (grouped) data frames inside function calls", {
+    data(mtcars)
+    x <- data_group(mtcars, "gear")
+
+    foo <- function(d) {
+      out <- data_modify(d, Trials = 1:n()) # nolint
+      out$Trials
+    }
+    expect_identical(
+      foo(x),
+      c(
+        1L, 2L, 3L, 1L, 2L, 3L, 4L, 4L, 5L, 6L, 7L, 5L, 6L, 7L, 8L,
+        9L, 10L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 11L, 1L, 2L, 3L,
+        4L, 5L, 12L
+      )
+    )
+  })
+)
+
+test_that("data_modify errors on non-defined function", {
+  expect_error(data_modify(iris, Species = foo()))
+})
diff --git a/tests/testthat/test-data_read.R b/tests/testthat/test-data_read.R
index fd4884deb..15f1161d3 100644
--- a/tests/testthat/test-data_read.R
+++ b/tests/testthat/test-data_read.R
@@ -141,12 +141,12 @@ test_that("data_read - RDS file, matrix, coercible", {
     httr::stop_for_status(request)
     writeBin(httr::content(request, type = "raw"), temp_file)
 
-    expect_message(expect_message(expect_message({
+    expect_message({
       d <- data_read(
         temp_file,
         verbose = TRUE
       )
-    })), regex = "0 out of 5")
+    })
 
     expect_s3_class(d, "data.frame")
     expect_identical(dim(d), c(2L, 5L))
@@ -154,6 +154,42 @@ test_that("data_read - RDS file, matrix, coercible", {
 })
 
 
+
+# RDS file, preserve class /types -----------------------------------
+
+test_that("data_read - RDS file, preserve class", {
+  withr::with_tempfile("temp_file", fileext = ".rds", code = {
+    request <- httr::GET("https://raw.github.com/easystats/circus/main/data/hiv.rds")
+    httr::stop_for_status(request)
+    writeBin(httr::content(request, type = "raw"), temp_file)
+
+    d <- data_read(temp_file)
+    expect_s3_class(d, "data.frame")
+    expect_identical(
+      sapply(d, class),
+      c(
+        village = "integer", outcome = "integer", distance = "numeric",
+        amount = "numeric", incentive = "integer", age = "integer",
+        hiv2004 = "integer", agecat = "factor"
+      )
+    )
+  })
+})
+
+
+
+# RData -----------------------------------
+
+test_that("data_read - no warning for RData", {
+  withr::with_tempfile("temp_file", fileext = ".RData", code = {
+    data(mtcars)
+    save(mtcars, file = temp_file)
+    expect_silent(data_read(temp_file, verbose = FALSE))
+  })
+})
+
+
+
 # SPSS file -----------------------------------
 
 test_that("data_read - SPSS file", {
diff --git a/tests/testthat/test-data_rename.R b/tests/testthat/test-data_rename.R
index a8d003b59..e01c42f8b 100644
--- a/tests/testthat/test-data_rename.R
+++ b/tests/testthat/test-data_rename.R
@@ -14,6 +14,10 @@ test_that("data_rename works with one or several replacements", {
     ),
     c("length", "width", "Petal.Length", "Petal.Width", "Species")
   )
+  expect_named(
+    data_rename(test, c(length = "Sepal.Length", width = "Sepal.Width")),
+    c("length", "width", "Petal.Length", "Petal.Width", "Species")
+  )
 })
 
 test_that("data_rename returns a data frame", {
@@ -24,11 +28,26 @@ test_that("data_rename returns a data frame", {
 test_that("data_rename: pattern must be of type character", {
   expect_error(
     data_rename(test, pattern = 1),
-    regexp = "Argument `pattern` must be of type character."
+    regexp = "Argument `pattern` must be of type character"
   )
   expect_error(
     data_rename(test, pattern = TRUE),
-    regexp = "Argument `pattern` must be of type character."
+    regexp = "Argument `pattern` must be of type character"
+  )
+})
+
+test_that("data_rename: replacement not allowed to have NA or empty strings", {
+  expect_error(
+    data_rename(test, pattern = c(test = "Species", "Sepal.Length")),
+    regexp = "Either name all elements of `pattern`"
+  )
+  expect_error(
+    data_rename(
+      test,
+      pattern = c("Species", "Sepal.Length"),
+      replacement = c("foo", NA_character_)
+    ),
+    regexp = "`replacement` is not allowed"
   )
 })
 
@@ -42,7 +61,9 @@ test_that("data_rename uses indices when no replacement", {
 
 test_that("data_rename works when too many names in 'replacement'", {
   expect_message(
-    x <- data_rename(test, replacement = paste0("foo", 1:6)),
+    {
+      x <- data_rename(test, replacement = paste0("foo", 1:6))
+    },
     "There are more names in"
   )
   expect_identical(dim(test), dim(x))
@@ -51,7 +72,9 @@ test_that("data_rename works when too many names in 'replacement'", {
 
 test_that("data_rename works when not enough names in 'replacement'", {
   expect_message(
-    x <- data_rename(test, replacement = paste0("foo", 1:2)),
+    {
+      x <- data_rename(test, replacement = paste0("foo", 1:2))
+    },
     "There are more names in"
   )
   expect_identical(dim(test), dim(x))
diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R
index 746d4c51a..c60b142d2 100644
--- a/tests/testthat/test-data_summary.R
+++ b/tests/testthat/test-data_summary.R
@@ -175,7 +175,7 @@ test_that("data_summary, with NA", {
   data(efc, package = "datawizard")
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code")
   expect_snapshot(print(out))
-  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", include_na = FALSE)
+  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", remove_na = TRUE)
   expect_snapshot(print(out))
   # sorting for multiple groups
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code"))
diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R
index 39f5d44c6..9848d42b9 100644
--- a/tests/testthat/test-data_tabulate.R
+++ b/tests/testthat/test-data_tabulate.R
@@ -81,7 +81,7 @@ test_that("data_tabulate data.frame", {
         "Variable", "Value", "N", "Raw %", "Valid %",
         "Cumulative %"
       ),
-      class = c("dw_data_tabulate", "data.frame"),
+      class = c("datawizard_table", "data.frame"),
       row.names = 1:3,
       type = "numeric",
       varname = "e16sex",
@@ -99,7 +99,7 @@ test_that("data_tabulate data.frame", {
         "Variable", "Value", "N", "Raw %", "Valid %",
         "Cumulative %"
       ),
-      class = c("dw_data_tabulate", "data.frame"),
+      class = c("datawizard_table", "data.frame"),
       row.names = 1:4,
       type = "numeric",
       varname = "c172code",
@@ -139,7 +139,7 @@ test_that("data_tabulate print", {
     attributes(out),
     list(
       names = c("Variable", "Value", "N", "Raw %", "Valid %", "Cumulative %"),
-      class = c("dw_data_tabulate", "data.frame"),
+      class = c("datawizard_table", "data.frame"),
       row.names = 1:4,
       type = "integer",
       varname = "Large Number",
@@ -197,7 +197,7 @@ test_that("data_tabulate grouped data.frame", {
         "Valid %",
         "Cumulative %"
       ),
-      class = c("dw_data_tabulate", "data.frame"),
+      class = c("datawizard_table", "data.frame"),
       row.names = 1:4,
       type = "numeric",
       varname = "c172code",
@@ -268,6 +268,7 @@ test_that("data_tabulate drop levels", {
 
 
 # select helpers ------------------------------
+
 test_that("data_tabulate regex", {
   data(mtcars)
   expect_identical(
@@ -286,16 +287,17 @@ test_that("data_tabulate exclude/include missing values", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
   out <- data_tabulate(efc$c172code)
   expect_identical(out$N, c(8L, 66L, 16L, 10L))
-  out <- data_tabulate(efc$c172code, include_na = FALSE)
+  out <- data_tabulate(efc$c172code, remove_na = TRUE)
   expect_identical(out$N, c(8L, 66L, 16L))
   out <- data_tabulate(efc$c172code, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15, 13))
-  out <- data_tabulate(efc$c172code, include_na = FALSE, weights = efc$weights)
+  out <- data_tabulate(efc$c172code, remove_na = TRUE, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15))
 })
 
 
 # cross tables ------------------------------
+
 test_that("data_tabulate, cross tables", {
   data(efc, package = "datawizard")
   set.seed(123)
@@ -303,17 +305,17 @@ test_that("data_tabulate, cross tables", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", weights = "weights")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE, weights = "weights"))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE, weights = "weights"))) # nolint
 })
 
 test_that("data_tabulate, cross tables, HTML", {
@@ -324,11 +326,11 @@ test_that("data_tabulate, cross tables, HTML", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)), "gt_tbl") # nolint
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint
 })
 
 test_that("data_tabulate, cross tables, grouped df", {
@@ -375,23 +377,24 @@ test_that("data_tabulate, cross tables, markdown", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)))
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)))
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint
 })
 
+
 # validate against table -------------------------
 
 test_that("data_tabulate, validate against table", {
   data(mtcars)
   # frequency table
   out1 <- as.data.frame(table(mtcars$cyl))
-  out2 <- data_tabulate(mtcars$cyl, include_na = FALSE)
+  out2 <- data_tabulate(mtcars$cyl, remove_na = TRUE)
   expect_identical(out1$Freq, out2$N)
   # crosstable
   out1 <- data_arrange(as.data.frame(table(mtcars$cyl, mtcars$gear)), c("Var1", "Var2"))
   out2 <- data_rename(data_to_long(
-    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, include_na = FALSE)), 2:4,
+    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, remove_na = TRUE)), 2:4,
     names_to = "Var2", values_to = "Freq"
   ), "mtcars$cyl", "Var1")
   out1[[2]] <- as.character(out1[[2]])
@@ -405,3 +408,68 @@ test_that("data_tabulate, correct 0% for proportions", {
   expect_identical(format(out[[1]])[[4]], c("0 (0%)", "0 (0%)", "0 (0%)", "0 (0%)", "", "0"))
   expect_snapshot(print(out[[1]]))
 })
+
+
+# coercing to data frame -------------------------
+
+test_that("data_tabulate, as.data.frame, frequency tables", {
+  data(mtcars)
+  # frequency table
+  x <- data_tabulate(mtcars$cyl)
+  out <- as.data.frame(x)
+  expect_named(out, c("Variable", "Value", "N", "Raw %", "Valid %", "Cumulative %"))
+  expect_identical(out$Variable, c("mtcars$cyl", "mtcars$cyl", "mtcars$cyl", "mtcars$cyl"))
+  expect_false(any(vapply(out[2:ncol(out)], is.character, logical(1))))
+  # frequency tables
+  x <- data_tabulate(mtcars, select = c("cyl", "am"))
+  out <- as.data.frame(x)
+  expect_named(out, c("var", "table"))
+  expect_equal(vapply(out, class, character(1)), c("character", "AsIs"), ignore_attr = TRUE)
+  expect_length(out$table, 2L)
+  expect_named(out$table[[1]], c("Variable", "Value", "N", "Raw %", "Valid %", "Cumulative %"))
+  expect_identical(out$table[[1]]$Variable, c("cyl", "cyl", "cyl", "cyl"))
+  expect_false(any(vapply(out$table[[1]][2:ncol(out$table[[1]])], is.character, logical(1))))
+})
+
+
+test_that("data_tabulate, as.data.frame, cross tables", {
+  data(mtcars)
+  # cross table
+  x <- data_tabulate(mtcars, "cyl", by = "am")
+  out <- as.data.frame(x)
+  expect_named(out, c("var", "table"))
+  expect_equal(vapply(out, class, character(1)), c("character", "AsIs"), ignore_attr = TRUE)
+  expect_length(out$table, 1L)
+  expect_named(out$table[[1]], c("cyl", "0", "1", "NA"))
+  expect_identical(nrow(out$table[[1]]), 4L)
+  # cross tables
+  x <- data_tabulate(mtcars, c("cyl", "vs"), by = "am")
+  out <- as.data.frame(x)
+  expect_named(out, c("var", "table"))
+  expect_equal(vapply(out, class, character(1)), c("character", "AsIs"), ignore_attr = TRUE)
+  expect_length(out$table, 2L)
+  expect_named(out$table[[1]], c("cyl", "0", "1", "NA"))
+  expect_identical(nrow(out$table[[1]]), 4L)
+})
+
+
+test_that("data_tabulate, as.data.frame, cross tables with total N", {
+  # cross table, with total
+  x <- data_tabulate(mtcars, "cyl", by = "am")
+  out <- as.data.frame(x, add_total = TRUE)
+  expect_named(out, c("var", "table"))
+  expect_equal(vapply(out, class, character(1)), c("character", "AsIs"), ignore_attr = TRUE)
+  expect_length(out$table, 1L)
+  expect_named(out$table[[1]], c("cyl", "0", "1", "<NA>", "Total"))
+  expect_identical(nrow(out$table[[1]]), 5L)
+  expect_identical(out$table[[1]]$cyl, c("4", "6", "8", NA, "Total"))
+  # cross tables, with total
+  x <- data_tabulate(mtcars, c("cyl", "vs"), by = "am")
+  out <- as.data.frame(x, add_total = TRUE)
+  expect_named(out, c("var", "table"))
+  expect_equal(vapply(out, class, character(1)), c("character", "AsIs"), ignore_attr = TRUE)
+  expect_length(out$table, 2L)
+  expect_named(out$table[[1]], c("cyl", "0", "1", "<NA>", "Total"))
+  expect_identical(nrow(out$table[[1]]), 5L)
+  expect_identical(out$table[[1]]$cyl, c("4", "6", "8", NA, "Total"))
+})
diff --git a/tests/testthat/test-data_to_numeric.R b/tests/testthat/test-data_to_numeric.R
index 464c35e8d..816591ac0 100644
--- a/tests/testthat/test-data_to_numeric.R
+++ b/tests/testthat/test-data_to_numeric.R
@@ -1,5 +1,5 @@
 test_that("convert data frame to numeric", {
-  expect_snapshot(to_numeric(head(ToothGrowth)))
+  expect_snapshot(to_numeric(head(ToothGrowth), dummy_factors = TRUE))
   expect_snapshot(to_numeric(head(ToothGrowth), dummy_factors = FALSE))
 })
 
@@ -41,7 +41,7 @@ test_that("convert character to numeric lowest", {
 
 test_that("convert factor to numeric", {
   f <- factor(substring("statistics", 1:10, 1:10))
-  expect_snapshot(to_numeric(f))
+  expect_snapshot(to_numeric(f, dummy_factors = TRUE))
 })
 
 test_that("convert factor to numeric", {
@@ -67,12 +67,12 @@ test_that("convert factor to numeric, dummy factors", {
 test_that("convert factor to numeric, append", {
   data(efc)
   expect_identical(
-    colnames(to_numeric(efc)),
+    colnames(to_numeric(efc, dummy_factors = TRUE)),
     c("c12hour", "e16sex", "e42dep.1", "e42dep.2", "e42dep.3", "e42dep.4", "c172code", "neg_c_7"),
     ignore_attr = TRUE
   )
   expect_identical(
-    colnames(to_numeric(efc, append = TRUE)),
+    colnames(to_numeric(efc, dummy_factors = TRUE, append = TRUE)),
     c(
       "c12hour", "e16sex", "e42dep", "c172code", "neg_c_7", "e42dep_n",
       "e42dep_n.1", "e42dep_n.2", "e42dep_n.3", "e42dep_n.4"
diff --git a/tests/testthat/test-demean.R b/tests/testthat/test-demean.R
index 566bd6097..6e169f9c0 100644
--- a/tests/testthat/test-demean.R
+++ b/tests/testthat/test-demean.R
@@ -57,8 +57,174 @@ test_that("demean shows message if some vars don't exist", {
   )
 
   set.seed(123)
-  expect_message(
+  expect_error(
     demean(dat, select = "foo", by = "ID"),
     regexp = "not found"
   )
 })
+
+
+# see issue #520
+test_that("demean for cross-classified designs (by > 1)", {
+  skip_if_not_installed("poorman")
+
+  data(efc, package = "datawizard")
+  dat <- na.omit(efc)
+  dat$e42dep <- factor(dat$e42dep)
+  dat$c172code <- factor(dat$c172code)
+
+  x2a <- dat %>%
+    data_group(e42dep) %>%
+    data_modify(
+      c12hour_e42dep = mean(c12hour)
+    ) %>%
+    data_ungroup() %>%
+    data_group(c172code) %>%
+    data_modify(
+      c12hour_c172code = mean(c12hour)
+    ) %>%
+    data_ungroup() %>%
+    data_modify(
+      c12hour_within = c12hour - c12hour_e42dep - c12hour_c172code
+    )
+
+  out <- degroup(
+    dat,
+    select = "c12hour",
+    by = c("e42dep", "c172code"),
+    suffix_demean = "_within"
+  )
+
+  expect_equal(
+    out$c12hour_e42dep_between,
+    x2a$c12hour_e42dep,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    out$c12hour_within,
+    x2a$c12hour_within,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+
+  x2a <- dat %>%
+    data_group(e42dep) %>%
+    data_modify(
+      c12hour_e42dep = mean(c12hour, na.rm = TRUE),
+      neg_c_7_e42dep = mean(neg_c_7, na.rm = TRUE)
+    ) %>%
+    data_ungroup() %>%
+    data_group(c172code) %>%
+    data_modify(
+      c12hour_c172code = mean(c12hour, na.rm = TRUE),
+      neg_c_7_c172code = mean(neg_c_7, na.rm = TRUE)
+    ) %>%
+    data_ungroup() %>%
+    data_modify(
+      c12hour_within = c12hour - c12hour_e42dep - c12hour_c172code,
+      neg_c_7_within = neg_c_7 - neg_c_7_e42dep - neg_c_7_c172code
+    )
+
+  out <- degroup(
+    dat,
+    select = c("c12hour", "neg_c_7"),
+    by = c("e42dep", "c172code"),
+    suffix_demean = "_within"
+  )
+
+  expect_equal(
+    out$c12hour_e42dep_between,
+    x2a$c12hour_e42dep,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    out$neg_c_7_c172code_between,
+    x2a$neg_c_7_c172code,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    out$neg_c_7_within,
+    x2a$neg_c_7_within,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    out$c12hour_within,
+    x2a$c12hour_within,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+})
+
+
+test_that("demean, sanity checks", {
+  data(efc, package = "datawizard")
+  dat <- na.omit(efc)
+  dat$e42dep <- factor(dat$e42dep)
+  dat$c172code <- factor(dat$c172code)
+
+  expect_error(
+    degroup(
+      dat,
+      select = c("c12hour", "neg_c_8"),
+      by = c("e42dep", "c172code"),
+      suffix_demean = "_within"
+    ),
+    regex = "Variable \"neg_c_8\" was not found"
+  )
+  expect_error(
+    degroup(
+      dat,
+      select = c("c12hour", "neg_c_8"),
+      by = c("e42dep", "c173code"),
+      suffix_demean = "_within"
+    ),
+    regex = "Variables \"neg_c_8\" and \"c173code\" were not found"
+  )
+})
+
+
+test_that("demean for nested designs (by > 1), nested = TRUE", {
+  data(efc, package = "datawizard")
+  dat <- na.omit(efc)
+  dat$e42dep <- factor(dat$e42dep)
+  dat$c172code <- factor(dat$c172code)
+
+  x_ijk <- dat$c12hour
+  xbar_k <- ave(x_ijk, dat$e42dep, FUN = mean)
+  xbar_jk <- ave(x_ijk, dat$e42dep, dat$c172code, FUN = mean)
+
+  L3_between <- xbar_k
+  L2_between <- xbar_jk - xbar_k
+  L1_within <- x_ijk - xbar_jk
+
+  out <- degroup(
+    dat,
+    select = "c12hour",
+    by = c("e42dep", "c172code"),
+    nested = TRUE,
+    suffix_demean = "_within"
+  )
+
+  expect_equal(
+    out$c12hour_within,
+    L1_within,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    out$c12hour_e42dep_between,
+    L3_between,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    out$c12hour_c172code_between,
+    L2_between,
+    tolerance = 1e-4,
+    ignore_attr = TRUE
+  )
+})
diff --git a/tests/testthat/test-describe_distribution.R b/tests/testthat/test-describe_distribution.R
index 83d2abb33..dfa7bf617 100644
--- a/tests/testthat/test-describe_distribution.R
+++ b/tests/testthat/test-describe_distribution.R
@@ -286,3 +286,16 @@ test_that("describe_distribution formatting", {
   x <- describe_distribution(iris$Sepal.Width, quartiles = TRUE)
   expect_snapshot(format(x))
 })
+
+# other -----------------------------------
+
+test_that("return NA in CI if sample is too sparse", {
+  skip_if_not_installed("bayestestR")
+  set.seed(123456)
+  expect_warning(
+    res <- describe_distribution(mtcars[mtcars$cyl == "6", ], wt, centrality = "map", ci = 0.95), # nolint
+    "When bootstrapping CIs, sample was too sparse to find TD"
+  )
+  expect_identical(res$CI_low, NA)
+  expect_identical(res$CI_high, NA)
+})
diff --git a/tests/testthat/test-mean_sd.R b/tests/testthat/test-mean_sd.R
index e0af8a0f1..3e0829fb1 100644
--- a/tests/testthat/test-mean_sd.R
+++ b/tests/testthat/test-mean_sd.R
@@ -15,8 +15,3 @@ test_that("mean_sd", {
   expect_equal(unname(diff(msd2)), rep(sd(mtcars[["mpg"]]), 6), tolerance = 0.00001)
   expect_named(msd2, c("-3 SD", "-2 SD", "-1 SD", "Mean", "+1 SD", "+2 SD", "+3 SD"))
 })
-
-test_that("deprecation warning for `na.rm`", {
-  expect_warning(mean_sd(c(-1, 0, 1, NA), na.rm = TRUE))
-  expect_warning(median_mad(c(-1, 0, 1, 2, 3, NA), na.rm = TRUE))
-})
diff --git a/tests/testthat/test-row_count.R b/tests/testthat/test-row_count.R
new file mode 100644
index 000000000..0c7d67691
--- /dev/null
+++ b/tests/testthat/test-row_count.R
@@ -0,0 +1,57 @@
+test_that("row_count", {
+  d_mn <- data.frame(
+    c1 = c(1, 2, NA, 4),
+    c2 = c(NA, 2, NA, 5),
+    c3 = c(NA, 4, NA, NA),
+    c4 = c(2, 3, 7, 8)
+  )
+  expect_identical(row_count(d_mn, count = 2), c(1, 2, 0, 0))
+  expect_identical(row_count(d_mn, count = NA), c(2, 0, 3, 1))
+  d_mn <- data.frame(
+    c1 = c("a", "b", NA, "c"),
+    c2 = c(NA, "b", NA, "d"),
+    c3 = c(NA, 4, NA, NA),
+    c4 = c(2, 3, 7, Inf),
+    stringsAsFactors = FALSE
+  )
+  expect_identical(row_count(d_mn, count = "b"), c(0, 2, 0, 0))
+  expect_identical(row_count(d_mn, count = Inf), c(0, 0, 0, 1))
+})
+
+test_that("row_count, errors or messages", {
+  data(iris)
+  expect_error(expect_warning(row_count(iris, select = "abc")), regex = "must be a valid")
+  expect_error(expect_warning(row_count(iris, select = "abc", count = 3)), regex = "No columns")
+  expect_error(row_count(iris[1], count = 3), regex = "with at least")
+  expect_error(row_count(iris[-seq_len(nrow(iris)), , drop = FALSE], count = 2), regex = "one row")
+})
+
+test_that("row_count, allow_coercion match", {
+  d_mn <- data.frame(
+    c1 = c("1", "2", NA, "3"),
+    c2 = c(NA, "2", NA, "3"),
+    c3 = c(NA, 4, NA, NA),
+    c4 = c(2, 3, 7, Inf),
+    stringsAsFactors = FALSE
+  )
+  expect_identical(row_count(d_mn, count = 2, allow_coercion = TRUE), c(1, 2, 0, 0))
+  expect_identical(row_count(d_mn, count = 2, allow_coercion = FALSE), c(1, 0, 0, 0))
+  expect_identical(row_count(d_mn, count = "2", allow_coercion = FALSE), c(0, 2, 0, 0))
+  expect_identical(row_count(d_mn, count = factor("2"), allow_coercion = TRUE), c(1, 2, 0, 0))
+  expect_error(row_count(d_mn, count = factor("2"), allow_coercion = FALSE), regex = "No column has")
+
+  # mix character / factor
+  d_mn <- data.frame(
+    c1 = factor(c("1", "2", NA, "3")),
+    c2 = c("2", "1", NA, "3"),
+    c3 = c(NA, 4, NA, NA),
+    c4 = c(2, 3, 7, Inf),
+    stringsAsFactors = FALSE
+  )
+  expect_identical(row_count(d_mn, count = 2, allow_coercion = TRUE), c(2, 1, 0, 0))
+  expect_identical(row_count(d_mn, count = 2, allow_coercion = FALSE), c(1, 0, 0, 0))
+  expect_identical(row_count(d_mn, count = "2", allow_coercion = FALSE), c(1, 0, 0, 0))
+  expect_identical(row_count(d_mn, count = "2", allow_coercion = TRUE), c(2, 1, 0, 0))
+  expect_identical(row_count(d_mn, count = factor("2"), allow_coercion = FALSE), c(0, 1, 0, 0))
+  expect_identical(row_count(d_mn, count = factor("2"), allow_coercion = TRUE), c(2, 1, 0, 0))
+})
diff --git a/tests/testthat/test-row_means.R b/tests/testthat/test-row_means.R
index 8d0504c69..4db0d7039 100644
--- a/tests/testthat/test-row_means.R
+++ b/tests/testthat/test-row_means.R
@@ -1,4 +1,4 @@
-test_that("row_means", {
+test_that("row_means/sums", {
   d_mn <- data.frame(
     c1 = c(1, 2, NA, 4),
     c2 = c(NA, 2, NA, 5),
@@ -14,14 +14,21 @@ test_that("row_means", {
   expect_equal(row_means(d_mn, min_valid = 2, digits = 1), c(1.5, 2.8, NA, 5.7), tolerance = 1e-1)
   expect_message(row_means(iris), regex = "Only numeric")
   expect_equal(row_means(iris, verbose = FALSE), rowMeans(iris[, 1:4]), tolerance = 1e-3, ignore_attr = TRUE)
+  expect_equal(row_sums(d_mn, min_valid = 4), c(NA, 11, NA, NA), tolerance = 1e-3)
+  expect_equal(row_sums(d_mn, min_valid = 3), c(NA, 11, NA, 17), tolerance = 1e-3)
+  expect_message(row_sums(iris), regex = "Only numeric")
 })
 
-test_that("row_means, errors or messages", {
+test_that("row_means/sums, errors or messages", {
   data(iris)
   expect_error(expect_warning(row_means(iris, select = "abc")), regex = "No columns")
+  expect_error(expect_warning(row_sums(iris, select = "abc")), regex = "No columns")
   expect_error(row_means(iris[1], min_valid = 1), regex = "two numeric")
   expect_error(row_means(iris, min_valid = 1:4), regex = "numeric value")
   expect_error(row_means(iris, min_valid = "a"), regex = "numeric value")
   expect_message(row_means(iris[1:3, ], min_valid = 3), regex = "Only numeric")
   expect_silent(row_means(iris[1:3, ], min_valid = 3, verbose = FALSE))
+  expect_error(row_sums(iris[1], min_valid = 1), regex = "two numeric")
+  expect_message(row_sums(iris[1:3, ], min_valid = 3), regex = "Only numeric")
+  expect_silent(row_sums(iris[1:3, ], min_valid = 3, verbose = FALSE))
 })
diff --git a/tests/testthat/test-select_nse.R b/tests/testthat/test-select_nse.R
index c0195ad94..fb0f6aefb 100644
--- a/tests/testthat/test-select_nse.R
+++ b/tests/testthat/test-select_nse.R
@@ -138,3 +138,24 @@ test_that(".select_nse: works with function and namespace", {
   out <- fun(insight::find_predictors(model, effects = "fixed", flatten = TRUE))
   expect_identical(out, iris["Petal.Width"])
 })
+
+test_that(".select_nse: allow character vector with :", {
+  data(mtcars)
+  out <- data_select(mtcars, c("cyl:hp", "wt", "vs:gear"))
+  expect_named(out, c("cyl", "disp", "hp", "wt", "vs", "am", "gear"))
+  out <- data_select(mtcars, c("cyl:hp", "wta", "vs:gear"))
+  expect_named(out, c("cyl", "disp", "hp", "vs", "am", "gear"))
+  out <- data_select(mtcars, c("hp:cyl", "wta", "vs:gear"))
+  expect_named(out, c("hp", "disp", "cyl", "vs", "am", "gear"))
+  out <- data_select(mtcars, c("cyl:hq", "wt", "vs:gear"))
+  expect_named(out, c("wt", "vs", "am", "gear"))
+
+  expect_warning(
+    center(mtcars, c("cyl:hp", "wta", "vs:gear"), verbose = TRUE),
+    regex = "Did you mean \"wt\""
+  )
+  expect_warning(
+    center(mtcars, c("cyl:hq", "wt", "vs:gear"), verbose = TRUE),
+    regex = "Did you mean one of \"hp\""
+  )
+})
diff --git a/tests/testthat/test-standardize_models.R b/tests/testthat/test-standardize_models.R
index 706a4e6e7..d61caf450 100644
--- a/tests/testthat/test-standardize_models.R
+++ b/tests/testthat/test-standardize_models.R
@@ -31,6 +31,29 @@ test_that("standardize | errors", {
 })
 
 
+test_that("standardize | problematic formulas", {
+  data(mtcars)
+  m <- lm(mpg ~ hp, data = mtcars)
+  expect_equal(
+    coef(standardise(m)),
+    c(`(Intercept)` = -3.14935717633686e-17, hp = -0.776168371826586),
+    tolerance = 1e-4
+  )
+
+  colnames(mtcars)[1] <- "1_mpg"
+  m <- lm(`1_mpg` ~ hp, data = mtcars)
+  expect_error(standardise(m), regex = "Looks like")
+
+  # works interactive only
+  # data(mtcars)
+  # m <- lm(mtcars$mpg ~ mtcars$hp)
+  # expect_error(standardise(m), regex = "model formulas")
+
+  m <- lm(mtcars[, 1] ~ hp, data = mtcars)
+  expect_error(standardise(m), regex = "indexed data")
+})
+
+
 # Transformations ---------------------------------------------------------
 test_that("transformations", {
   skip_if_not_installed("effectsize")
@@ -206,15 +229,14 @@ test_that("standardize non-Gaussian response", {
 # variables evaluated in the environment $$$ ------------------------------
 test_that("variables evaluated in the environment", {
   m <- lm(mtcars$mpg ~ mtcars$cyl + am, data = mtcars)
-  w <- capture_warnings(standardize(m))
-  expect_true(any(grepl("mtcars$mpg", w, fixed = TRUE)))
+  w <- capture_error(standardize(m))
+  expect_true(any(grepl("Using `$`", w, fixed = TRUE)))
 
   ## Note:
   # No idea why this is suddenly not giving a warning on older R versions.
   m <- lm(mtcars$mpg ~ mtcars$cyl + mtcars$am, data = mtcars)
-  warns <- capture_warnings(standardize(m))
-  expect_true(any(grepl("mtcars$mpg", warns, fixed = TRUE)))
-  expect_true(any(grepl("No variables", warns, fixed = TRUE)))
+  w <- capture_error(standardize(m))
+  expect_true(any(grepl("Using `$`", w, fixed = TRUE)))
 })
 
 
diff --git a/vignettes/overview_of_vignettes.Rmd b/vignettes/overview_of_vignettes.Rmd
new file mode 100644
index 000000000..033234607
--- /dev/null
+++ b/vignettes/overview_of_vignettes.Rmd
@@ -0,0 +1,37 @@
+---
+title: "Overview of Vignettes"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Overview of Vignettes}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r message=FALSE, warning=FALSE, include=FALSE}
+library(knitr)
+knitr::opts_chunk$set(
+  echo = TRUE,
+  collapse = TRUE,
+  warning = FALSE,
+  message = FALSE,
+  comment = "#>",
+  eval = TRUE
+)
+```
+
+All package vignettes are available at [https://easystats.github.io/datawizard/](https://easystats.github.io/datawizard/).
+
+## Function Overview
+
+* [Function Reference](https://easystats.github.io/datawizard/reference/index.html)
+
+
+## Data Preparation
+
+* [Coming from 'tidyverse'](https://easystats.github.io/datawizard/articles/tidyverse_translation.html)
+* [A quick summary of selection syntax in `{datawizard}`](https://easystats.github.io/datawizard/articles/selection_syntax.html)
+
+
+## Statistical Transformations
+
+* [Data Standardization](https://easystats.github.io/datawizard/articles/standardize_data.html)
diff --git a/vignettes/selection_syntax.Rmd b/vignettes/selection_syntax.Rmd
index 9b501ebd5..3c0953f65 100644
--- a/vignettes/selection_syntax.Rmd
+++ b/vignettes/selection_syntax.Rmd
@@ -15,8 +15,7 @@ knitr::opts_chunk$set(
 
 pkgs <- c(
   "datawizard",
-  "dplyr",
-  "htmltools"
+  "dplyr"
 )
 
 if (!all(vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1L)))) {
@@ -27,18 +26,10 @@ if (!all(vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1L))
 ```{r load, echo=FALSE, message=FALSE}
 library(datawizard)
 library(dplyr)
-library(htmltools)
 
 set.seed(123)
 iris <- iris[sample(nrow(iris), 10), ]
 row.names(iris) <- NULL
-
-row <- function(...) {
-  div(
-    class = "custom_note",
-    ...
-  )
-}
 ```
 
 ```{css, echo=FALSE}
@@ -127,18 +118,26 @@ data_select(iris, contains("pal", "ec"))
 data_select(iris, regex("^Sep|ies"))
 ```
 
-```{r echo=FALSE}
-row("Note: these functions are not exported by `datawizard` but are detected and
-applied internally. This means that they won't be detected by autocompletion
-when we write them.")
-```
 
-```{r echo=FALSE}
-row("Note #2: because these functions are not exported, they will not create
-conflicts with the ones that come from the `tidyverse` and that have the same name.
-So we can still use `dplyr` and its friends, it won't change anything for selection
-in `datawizard` functions!")
-```
+<!-- NOTE: use raw HTML so that vignette can compile even if `evaluate_chunk` is FALSE. -->
+<!-- See e.g. #527 -->
+
+<div class="custom_note">
+  <p>
+    Note: these functions are not exported by `datawizard` but are detected and
+    applied internally. This means that they won't be detected by autocompletion
+    when we write them.
+  </p>
+</div>
+
+<div class="custom_note">
+  <p>
+    Note #2: because these functions are not exported, they will not create
+    conflicts with the ones that come from the `tidyverse` and that have the same 
+    name. Therefore, we can still use `dplyr` and its friends, it won't change 
+    anything for selection in `datawizard` functions!
+  </p>
+</div>
 
 
 # Excluding variables
diff --git a/vignettes/tidyverse_translation.Rmd b/vignettes/tidyverse_translation.Rmd
index b03402468..ae4b339b3 100644
--- a/vignettes/tidyverse_translation.Rmd
+++ b/vignettes/tidyverse_translation.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "Coming from 'tidyverse'"
-output: 
+output:
   rmarkdown::html_vignette:
     toc: true
 vignette: >
@@ -9,7 +9,7 @@ vignette: >
   %\VignetteEngine{knitr::rmarkdown}
 ---
 
-```{r message=FALSE, warning=FALSE, include=FALSE, eval = TRUE}
+```{r setup, message=FALSE, warning=FALSE, include=FALSE, eval = TRUE}
 library(knitr)
 options(knitr.kable.NA = "")
 knitr::opts_chunk$set(
@@ -21,57 +21,71 @@ knitr::opts_chunk$set(
 
 pkgs <- c(
   "dplyr",
-  "datawizard",
   "tidyr"
 )
+all_deps_available <- all(vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1L)))
 
-# since we explicitely put eval = TRUE for some chunks, we can't rely on
-# knitr::opts_chunk$set(eval = FALSE) at the beginning of the script. So we make
-# a logical that is FALSE only if deps are not installed (cf easystats/easystats#317)
-evaluate_chunk <- TRUE
-
-if (!all(vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1L)))) {
-  evaluate_chunk <- FALSE
+if (all_deps_available) {
+  library(datawizard)
+  library(dplyr)
+  library(tidyr)
 }
+
+# Since we explicitly put `eval = TRUE` for some chunks, we can't rely on
+# `knitr::opts_chunk$set(eval = FALSE)` at the beginning of the script.
+# Therefore, we introduce a logical that is `FALSE` only if all suggested
+# dependencies are not installed (cf easystats/easystats#317)
+evaluate_chunk <- all_deps_available && getRversion() >= "4.1.0"
 ```
 
 This vignette can be referred to by citing the following:
 
 Patil et al., (2022). datawizard: An R Package for Easy Data Preparation and Statistical Transformations. *Journal of Open Source Software*, *7*(78), 4684, https://doi.org/10.21105/joss.04684
 
-```{css, echo=FALSE, eval = evaluate_chunk}
+```{css, echo=FALSE, eval = TRUE}
 .datawizard, .datawizard > .sourceCode {
   background-color: #e6e6ff;
 }
 .tidyverse, .tidyverse > .sourceCode {
   background-color: #d9f2e5;
 }
+.custom_note {
+  border-left: solid 5px hsl(220, 100%, 30%);
+  background-color: hsl(220, 100%, 95%);
+  padding: 5px;
+  margin-bottom: 10px
+}
 ```
 
 # Introduction
 
-`{datawizard}` package aims to make basic data wrangling easier than 
+`{datawizard}` package aims to make basic data wrangling easier than
 with base R. The data wrangling workflow it supports is similar to the one
 supported by the tidyverse package combination of `{dplyr}` and `{tidyr}`. However,
 one of its main features is that it has a very few dependencies: `{stats}` and `{utils}`
-(included in base R) and `{insight}`, which is the core package of the _easystats_ 
-ecosystem. This package grew organically to simultaneously satisfy the 
+(included in base R) and `{insight}`, which is the core package of the _easystats_
+ecosystem. This package grew organically to simultaneously satisfy the
 "0 non-base hard dependency" principle of _easystats_ and the data wrangling needs
-of the constituent packages in this ecosystem.
-
-One drawback of this genesis is that not all features of the `{tidyverse}` 
-packages are supported since only features that were necessary for _easystats_ 
-ecosystem have been implemented. Some of these missing features (such as `summarize`
-or the pipe operator `%>%`) are made available in other dependency-free packages, 
-such as [`{poorman}`](https://github.com/nathaneastwood/poorman/). It is also 
-important to note that `{datawizard}` was designed to avoid namespace collisions 
+of the constituent packages in this ecosystem. It is also
+important to note that `{datawizard}` was designed to avoid namespace collisions
 with `{tidyverse}` packages.
 
-In this article, we will see how to go through basic data wrangling steps with 
-`{datawizard}`. We will also compare it to the `{tidyverse}` syntax for achieving the same. 
+In this article, we will see how to go through basic data wrangling steps with
+`{datawizard}`. We will also compare it to the `{tidyverse}` syntax for achieving the same.
 This way, if you decide to make the switch, you can easily find the translations here.
 This vignette is largely inspired from `{dplyr}`'s [Getting started vignette](https://dplyr.tidyverse.org/articles/dplyr.html).
 
+<!-- NOTE: use raw HTML so that vignette can compile even if `evaluate_chunk` is FALSE. -->
+<!-- See e.g. #527 -->
+
+<div class="custom_note">
+  <p>
+    Note: In this vignette, we use the native pipe-operator, `|>`, which was 
+    introduced in R 4.1. Users of R version 3.6 or 4.0 should replace the native 
+    pipe by magrittr's one (`%>%`) so that examples work.
+  </p>
+</div>
+
 ```{r, eval = evaluate_chunk}
 library(dplyr)
 library(tidyr)
@@ -83,23 +97,23 @@ efc <- head(efc)
 
 # Workhorses
 
-Before we look at their *tidyverse* equivalents, we can first have a look at 
+Before we look at their *tidyverse* equivalents, we can first have a look at
 `{datawizard}`'s key functions for data wrangling:
 
-| Function          | Operation                                         |
-| :---------------- | :------------------------------------------------ |
-| `data_filter()`   | [to select only certain observations](#filtering) |
-| `data_select()`   | [to select only a few variables](#selecting)      |
-| `data_modify()`   | [to create variables or modify existing ones](#modifying) |
-| `data_arrange()`  | [to sort observations](#sorting)                  |
-| `data_extract()`  | [to extract a single variable](#extracting)       |
-| `data_rename()`   | [to rename variables](#renaming)                  |
-| `data_relocate()` | [to reorder a data frame](#relocating)            |
-| `data_to_long()`  | [to convert data from wide to long](#reshaping)   |
-| `data_to_wide()`  | [to convert data from long to wide](#reshaping)   |
-| `data_join()`     | [to join two data frames](#joining)               |
-| `data_unite()`    | [to concatenate several columns into a single one](#uniting)               |
-| `data_separate()` | [to separate a single column into multiple columns](#separating)               |
+| Function          | Operation                                                        |
+| :---------------- | :--------------------------------------------------------------- |
+| `data_filter()`   | [to select only certain observations](#filtering)                |
+| `data_select()`   | [to select only a few variables](#selecting)                     |
+| `data_modify()`   | [to create variables or modify existing ones](#modifying)        |
+| `data_arrange()`  | [to sort observations](#sorting)                                 |
+| `data_extract()`  | [to extract a single variable](#extracting)                      |
+| `data_rename()`   | [to rename variables](#renaming)                                 |
+| `data_relocate()` | [to reorder a data frame](#relocating)                           |
+| `data_to_long()`  | [to convert data from wide to long](#reshaping)                  |
+| `data_to_wide()`  | [to convert data from long to wide](#reshaping)                  |
+| `data_join()`     | [to join two data frames](#joining)                              |
+| `data_unite()`    | [to concatenate several columns into a single one](#uniting)     |
+| `data_separate()` | [to separate a single column into multiple columns](#separating) |
 
 Note that there are a few functions in `{datawizard}` that have no strict equivalent
 in `{dplyr}` or `{tidyr}` (e.g `data_rotate()`), and so we won't discuss them in
@@ -113,7 +127,7 @@ Before we look at them individually, let's first have a look at the summary tabl
 | :---------------- | :------------------------------------------------------------------ |
 | `data_filter()`   | `dplyr::filter()`, `dplyr::slice()`                                 |
 | `data_select()`   | `dplyr::select()`                                                   |
-| `data_modify()`   | `dplyr::mutate()`                                 |
+| `data_modify()`   | `dplyr::mutate()`                                                   |
 | `data_arrange()`  | `dplyr::arrange()`                                                  |
 | `data_extract()`  | `dplyr::pull()`                                                     |
 | `data_rename()`   | `dplyr::rename()`                                                   |
@@ -123,8 +137,8 @@ Before we look at them individually, let's first have a look at the summary tabl
 | `data_join()`     | `dplyr::inner_join()`, `dplyr::left_join()`, `dplyr::right_join()`, |
 |                   | `dplyr::full_join()`, `dplyr::anti_join()`, `dplyr::semi_join()`    |
 | `data_peek()`     | `dplyr::glimpse()`                                                  |
-| `data_unite()`    | `tidyr::unite()`                                              |
-| `data_separate()` | `tidyr::separate()`                                           |
+| `data_unite()`    | `tidyr::unite()`                                                    |
+| `data_separate()` | `tidyr::separate()`                                                 |
 
 ## Filtering {#filtering}
 
@@ -136,14 +150,14 @@ Before we look at them individually, let's first have a look at the summary tabl
 
 ```{r filter, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_filter(
     skin_color == "light",
     eye_color == "brown"
   )
 
 # or
-starwars %>%
+starwars |>
   data_filter(
     skin_color == "light" &
       eye_color == "brown"
@@ -155,7 +169,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   filter(
     skin_color == "light",
     eye_color == "brown"
@@ -176,9 +190,9 @@ starwars <- head(starwars)
 
 ## Selecting {#selecting}
 
-`data_select()` is the equivalent of `dplyr::select()`. 
+`data_select()` is the equivalent of `dplyr::select()`.
 The main difference between these two functions is that `data_select()` uses two
-arguments (`select` and `exclude`) and requires quoted column names if we want to 
+arguments (`select` and `exclude`) and requires quoted column names if we want to
 select several variables, while `dplyr::select()` accepts any unquoted column names.
 
 :::: {style="display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;"}
@@ -187,7 +201,7 @@ select several variables, while `dplyr::select()` accepts any unquoted column na
 
 ```{r select1, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_select(select = c("hair_color", "skin_color", "eye_color"))
 ```
 :::
@@ -196,7 +210,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   select(hair_color, skin_color, eye_color)
 ```
 :::
@@ -212,7 +226,7 @@ starwars %>%
 
 ```{r select2, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_select(select = -ends_with("color"))
 ```
 :::
@@ -221,7 +235,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   select(-ends_with("color"))
 ```
 :::
@@ -240,7 +254,7 @@ here and quoting them won't work. Should we comment on that? -->
 
 ```{r select3, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_select(select = -(hair_color:eye_color))
 ```
 :::
@@ -249,7 +263,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   select(!(hair_color:eye_color))
 ```
 :::
@@ -266,7 +280,7 @@ starwars %>%
 
 ```{r select4, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_select(exclude = regex("color$"))
 ```
 :::
@@ -275,7 +289,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   select(-contains("color$"))
 ```
 :::
@@ -292,7 +306,7 @@ starwars %>%
 
 ```{r select5, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_select(select = is.numeric)
 ```
 :::
@@ -301,7 +315,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   select(where(is.numeric))
 ```
 :::
@@ -316,8 +330,8 @@ You can find a list of all the select helpers with `?data_select`.
 
 ## Modifying {#modifying}
 
-`data_modify()` is a wrapper around `base::transform()` but has several additional 
-benefits: 
+`data_modify()` is a wrapper around `base::transform()` but has several additional
+benefits:
 
 * it allows us to use newly created variables in the following expressions;
 * it works with grouped data;
@@ -325,8 +339,8 @@ benefits:
 * it accepts expressions as character vectors so that it is easy to program with it
 
 
-This last point is also the main difference between `data_modify()` and 
-`dplyr::mutate()`. 
+This last point is also the main difference between `data_modify()` and
+`dplyr::mutate()`.
 
 :::: {style="display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;"}
 
@@ -334,7 +348,7 @@ This last point is also the main difference between `data_modify()` and
 
 ```{r modify1, class.source = "datawizard"}
 # ---------- datawizard -----------
-efc %>%
+efc |>
   data_modify(
     c12hour_c = center(c12hour),
     c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),
@@ -347,7 +361,7 @@ efc %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-efc %>%
+efc |>
   mutate(
     c12hour_c = center(c12hour),
     c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),
@@ -400,7 +414,7 @@ such as `starts_with()` in `data_arrange()`.
 :::{}
 ```{r arrange1, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_arrange(c("hair_color", "height"))
 ```
 :::
@@ -409,7 +423,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   arrange(hair_color, height)
 ```
 :::
@@ -419,14 +433,14 @@ starwars %>%
 ```{r arrange1, eval = evaluate_chunk, echo = FALSE}
 ```
 
-You can also sort variables in descending order by putting a `"-"` in front of 
+You can also sort variables in descending order by putting a `"-"` in front of
 their name, like below:
 
 :::: {style="display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;"}
 :::{}
 ```{r arrange2, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_arrange(c("-hair_color", "-height"))
 ```
 :::
@@ -435,7 +449,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   arrange(desc(hair_color), -height)
 ```
 :::
@@ -448,15 +462,15 @@ starwars %>%
 
 ## Extracting {#extracting}
 
-Although we mostly work on data frames, it is sometimes useful to extract a single 
-column as a vector. This can be done with `data_extract()`, which reproduces the 
+Although we mostly work on data frames, it is sometimes useful to extract a single
+column as a vector. This can be done with `data_extract()`, which reproduces the
 behavior of `dplyr::pull()`:
 
 :::: {style="display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;"}
 :::{}
 ```{r extract1, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_extract(gender)
 ```
 :::
@@ -465,7 +479,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   pull(gender)
 ```
 :::
@@ -479,7 +493,7 @@ We can also specify several variables in `select`. In this case, `data_extract()
 is equivalent to `data_select()`:
 
 ```{r eval = evaluate_chunk}
-starwars %>%
+starwars |>
   data_extract(select = contains("color"))
 ```
 
@@ -488,9 +502,9 @@ starwars %>%
 
 ## Renaming {#renaming}
 
-`data_rename()` is the equivalent of `dplyr::rename()` but the syntax between the 
+`data_rename()` is the equivalent of `dplyr::rename()` but the syntax between the
 two is different. While `dplyr::rename()` takes new-old pairs of column
-names, `data_rename()` requires a vector of column names to rename, and then 
+names, `data_rename()` requires a vector of column names to rename, and then
 a vector of new names for these columns that must be of the same length.
 
 :::: {style="display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;"}
@@ -499,7 +513,7 @@ a vector of new names for these columns that must be of the same length.
 
 ```{r rename1, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_rename(
     pattern = c("sex", "hair_color"),
     replacement = c("Sex", "Hair Color")
@@ -511,7 +525,7 @@ starwars %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   rename(
     Sex = sex,
     "Hair Color" = hair_color
@@ -524,14 +538,14 @@ starwars %>%
 ```{r rename1, eval = evaluate_chunk, echo = FALSE}
 ```
 
-The way `data_rename()` is designed makes it easy to apply the same modifications 
-to a vector of column names. For example, we can remove underscores and use 
+The way `data_rename()` is designed makes it easy to apply the same modifications
+to a vector of column names. For example, we can remove underscores and use
 TitleCase with the following code:
 
 ```{r rename2}
 to_rename <- names(starwars)
 
-starwars %>%
+starwars |>
   data_rename(
     pattern = to_rename,
     replacement = tools::toTitleCase(gsub("_", " ", to_rename, fixed = TRUE))
@@ -541,16 +555,16 @@ starwars %>%
 ```{r rename2, eval = evaluate_chunk, echo = FALSE}
 ```
 
-It is also possible to add a prefix or a suffix to all or a subset of variables 
-with `data_addprefix()` and `data_addsuffix()`. The argument `select` accepts 
+It is also possible to add a prefix or a suffix to all or a subset of variables
+with `data_addprefix()` and `data_addsuffix()`. The argument `select` accepts
 all select helpers that we saw above with `data_select()`:
 
 ```{r rename3}
-starwars %>%
+starwars |>
   data_addprefix(
     pattern = "OLD.",
     select = contains("color")
-  ) %>%
+  ) |>
   data_addsuffix(
     pattern = ".NEW",
     select = -contains("color")
@@ -566,7 +580,7 @@ Sometimes, we want to relocate one or a small subset of columns in the dataset.
 Rather than typing many names in `data_select()`, we can use `data_relocate()`,
 which is the equivalent of `dplyr::relocate()`. Just like `data_select()`, we can
 specify a list of variables we want to relocate with `select` and `exclude`.
-Then, the arguments `before` and `after`^[Note that we use `before` and `after` 
+Then, the arguments `before` and `after`^[Note that we use `before` and `after`
 whereas `dplyr::relocate()` uses `.before` and `.after`.] specify where the selected columns should
 be relocated:
 
@@ -576,32 +590,32 @@ be relocated:
 
 ```{r relocate1, class.source = "datawizard"}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_relocate(sex:homeworld, before = "height")
 ```
 :::
-  
+
 ::: {}
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-starwars %>%
+starwars |>
   relocate(sex:homeworld, .before = height)
 ```
 :::
-  
+
 ::::
 
 ```{r relocate1, eval = evaluate_chunk, echo = FALSE}
 ```
 
 In addition to column names, `before` and `after` accept column indices. Finally,
-one can use `before = -1` to relocate the selected columns just before the last 
+one can use `before = -1` to relocate the selected columns just before the last
 column, or `after = -1` to relocate them after the last column.
 
 ```{r eval = evaluate_chunk}
 # ---------- datawizard -----------
-starwars %>%
+starwars |>
   data_relocate(sex:homeworld, after = -1)
 ```
 
@@ -611,10 +625,10 @@ starwars %>%
 ### Longer
 
 Reshaping data from wide to long or from long to wide format can be done with
-`data_to_long()` and `data_to_wide()`. These functions were designed to match 
-`tidyr::pivot_longer()` and `tidyr::pivot_wider()` arguments, so that the only 
-thing to do is to change the function name. However, not all of 
-`tidyr::pivot_longer()` and `tidyr::pivot_wider()` features are available yet. 
+`data_to_long()` and `data_to_wide()`. These functions were designed to match
+`tidyr::pivot_longer()` and `tidyr::pivot_wider()` arguments, so that the only
+thing to do is to change the function name. However, not all of
+`tidyr::pivot_longer()` and `tidyr::pivot_wider()` features are available yet.
 
 We will use the `relig_income` dataset, as in the [`{tidyr}` vignette](https://tidyr.tidyverse.org/articles/pivot.html).
 
@@ -623,11 +637,11 @@ relig_income
 ```
 
 
-We would like to reshape this dataset to have 3 columns: religion, count, and 
-income. The column "religion" doesn't need to change, so we exclude it with 
-`-religion`. Then, each remaining column corresponds to an income category. 
-Therefore, we want to move all these column names to a single column called 
-"income". Finally, the values corresponding to each of these columns will be 
+We would like to reshape this dataset to have 3 columns: religion, count, and
+income. The column "religion" doesn't need to change, so we exclude it with
+`-religion`. Then, each remaining column corresponds to an income category.
+Therefore, we want to move all these column names to a single column called
+"income". Finally, the values corresponding to each of these columns will be
 reshaped to be in a single new column, called "count".
 
 :::: {style="display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;"}
@@ -636,7 +650,7 @@ reshaped to be in a single new column, called "count".
 
 ```{r pivot1, class.source = "datawizard"}
 # ---------- datawizard -----------
-relig_income %>%
+relig_income |>
   data_to_long(
     -religion,
     names_to = "income",
@@ -649,7 +663,7 @@ relig_income %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-relig_income %>%
+relig_income |>
   pivot_longer(
     !religion,
     names_to = "income",
@@ -676,7 +690,7 @@ billboard
 
 ```{r pivot2, class.source = "datawizard"}
 # ---------- datawizard -----------
-billboard %>%
+billboard |>
   data_to_long(
     cols = starts_with("wk"),
     names_to = "week",
@@ -690,7 +704,7 @@ billboard %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-billboard %>%
+billboard |>
   pivot_longer(
     cols = starts_with("wk"),
     names_to = "week",
@@ -721,7 +735,7 @@ fish_encounters
 
 ```{r pivot3, class.source = "datawizard"}
 # ---------- datawizard -----------
-fish_encounters %>%
+fish_encounters |>
   data_to_wide(
     names_from = "station",
     values_from = "seen",
@@ -734,7 +748,7 @@ fish_encounters %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-fish_encounters %>%
+fish_encounters |>
   pivot_wider(
     names_from = station,
     values_from = seen,
@@ -754,12 +768,12 @@ fish_encounters %>%
 
 <!-- explain a bit more the args of data_join -->
 
-In `{datawizard}`, joining datasets is done with `data_join()` (or its alias 
-`data_merge()`). Contrary to `{dplyr}`, this unique function takes care of all 
+In `{datawizard}`, joining datasets is done with `data_join()` (or its alias
+`data_merge()`). Contrary to `{dplyr}`, this unique function takes care of all
 types of join, which are then specified inside the function with the argument
 `join` (by default, `join = "left"`).
 
-Below, we show how to perform the four most common joins: full, left, right and 
+Below, we show how to perform the four most common joins: full, left, right and
 inner. We will use the datasets `band_members`and `band_instruments` provided by `{dplyr}`:
 
 :::: {style="display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;"}
@@ -789,7 +803,7 @@ band_instruments
 
 ```{r join1, class.source = "datawizard"}
 # ---------- datawizard -----------
-band_members %>%
+band_members |>
   data_join(band_instruments, join = "full")
 ```
 :::
@@ -798,7 +812,7 @@ band_members %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-band_members %>%
+band_members |>
   full_join(band_instruments)
 ```
 :::
@@ -818,7 +832,7 @@ band_members %>%
 
 ```{r join2, class.source = "datawizard"}
 # ---------- datawizard -----------
-band_members %>%
+band_members |>
   data_join(band_instruments, join = "left")
 ```
 :::
@@ -827,7 +841,7 @@ band_members %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-band_members %>%
+band_members |>
   left_join(band_instruments)
 ```
 :::
@@ -844,7 +858,7 @@ band_members %>%
 
 ```{r join3, class.source = "datawizard"}
 # ---------- datawizard -----------
-band_members %>%
+band_members |>
   data_join(band_instruments, join = "right")
 ```
 :::
@@ -853,7 +867,7 @@ band_members %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-band_members %>%
+band_members |>
   right_join(band_instruments)
 ```
 :::
@@ -873,7 +887,7 @@ band_members %>%
 
 ```{r join4, class.source = "datawizard"}
 # ---------- datawizard -----------
-band_members %>%
+band_members |>
   data_join(band_instruments, join = "inner")
 ```
 :::
@@ -882,7 +896,7 @@ band_members %>%
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-band_members %>%
+band_members |>
   inner_join(band_instruments)
 ```
 :::
@@ -916,7 +930,7 @@ test
 
 ```{r unite1, class.source = "datawizard"}
 # ---------- datawizard -----------
-test %>%
+test |>
   data_unite(
     new_column = "date",
     select = c("year", "month", "day"),
@@ -924,12 +938,12 @@ test %>%
   )
 ```
 :::
-  
+
 ::: {}
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-test %>%
+test |>
   unite(
     col = "date",
     year, month, day,
@@ -937,7 +951,7 @@ test %>%
   )
 ```
 :::
-  
+
 ::::
 
 ```{r unite1, eval = evaluate_chunk, echo = FALSE}
@@ -949,7 +963,7 @@ test %>%
 
 ```{r unite2, class.source = "datawizard"}
 # ---------- datawizard -----------
-test %>%
+test |>
   data_unite(
     new_column = "date",
     select = c("year", "month", "day"),
@@ -958,12 +972,12 @@ test %>%
   )
 ```
 :::
-  
+
 ::: {}
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-test %>%
+test |>
   unite(
     col = "date",
     year, month, day,
@@ -972,7 +986,7 @@ test %>%
   )
 ```
 :::
-  
+
 ::::
 
 ```{r unite2, eval = evaluate_chunk, echo = FALSE}
@@ -999,26 +1013,26 @@ test
 
 ```{r separate1, class.source = "datawizard"}
 # ---------- datawizard -----------
-test %>%
+test |>
   data_separate(
     select = "date_arrival",
     new_columns = c("Year", "Month", "Day")
   )
 ```
 :::
-  
+
 ::: {}
 
 ```{r, class.source = "tidyverse"}
 # ---------- tidyverse -----------
-test %>%
+test |>
   separate(
     date_arrival,
     into = c("Year", "Month", "Day")
   )
 ```
 :::
-  
+
 ::::
 
 ```{r separate1, eval = evaluate_chunk, echo = FALSE}
@@ -1028,7 +1042,7 @@ test %>%
 Unlike `tidyr::separate()`, you can separate multiple columns in one step with `data_separate()`.
 
 ```{r eval = evaluate_chunk}
-test %>%
+test |>
   data_separate(
     new_columns = list(
       date_arrival = c("Arr_Year", "Arr_Month", "Arr_Day"),
@@ -1040,9 +1054,9 @@ test %>%
 
 # Other useful functions
 
-`{datawizard}` contains other functions that are not necessarily included in 
-`{dplyr}` or `{tidyr}` or do not directly modify the data. Some of them are 
-inspired from the package `janitor`. 
+`{datawizard}` contains other functions that are not necessarily included in
+`{dplyr}` or `{tidyr}` or do not directly modify the data. Some of them are
+inspired from the package `janitor`.
 
 ## Work with rownames
 
@@ -1053,12 +1067,12 @@ We can convert a column in rownames and move rownames to a new column with
 mtcars <- head(mtcars)
 mtcars
 
-mtcars2 <- mtcars %>%
+mtcars2 <- mtcars |>
   rownames_as_column(var = "model")
 
 mtcars2
 
-mtcars2 %>%
+mtcars2 |>
   column_as_rownames(var = "model")
 ```
 
@@ -1068,7 +1082,7 @@ mtcars2 %>%
 The main difference is when we use it with grouped data. While `tibble::rowid_to_column()`
 uses one distinct rowid for every row in the dataset, `rowid_as_column()` creates
 one id for every row *in each group*. Therefore, two rows in different groups
-can have the same row id. 
+can have the same row id.
 
 This means that `rowid_as_column()` is closer to using `n()` in `mutate()`, like
 the following:
@@ -1081,16 +1095,16 @@ test <- data.frame(
 )
 test
 
-test %>%
-  data_group(group) %>%
+test |>
+  data_group(group) |>
   tibble::rowid_to_column()
 
-test %>%
-  data_group(group) %>%
+test |>
+  data_group(group) |>
   rowid_as_column()
 
-test %>%
-  data_group(group) %>%
+test |>
+  data_group(group) |>
   mutate(id = seq_len(n()))
 ```
 
@@ -1107,11 +1121,11 @@ x <- data.frame(
   X_2 = c(NA, "Title2", 4:6)
 )
 x
-x2 <- x %>%
+x2 <- x |>
   row_to_colnames(row = 2)
 x2
 
-x2 %>%
+x2 |>
   colnames_to_row()
 ```