diff --git a/.dev/test-value_at.R b/.dev/test-value_at.R
new file mode 100644
index 000000000..3c8272101
--- /dev/null
+++ b/.dev/test-value_at.R
@@ -0,0 +1,12 @@
+test_that("value_at", {
+  data(efc, package = "datawizard")
+  expect_equal(value_at(efc$e42dep, 5), 4, ignore_attr = TRUE)
+  expect_equal(value_at(efc$c12hour, 4), NA_real_, ignore_attr = TRUE)
+  expect_equal(value_at(efc$c12hour, 4, remove_na = TRUE), 168, ignore_attr = TRUE)
+  expect_equal(value_at(efc$c12hour, 5:7), efc$c12hour[5:7], ignore_attr = TRUE)
+  expect_equal(value_at(efc$e42dep, 123456, default = 55), 55, ignore_attr = TRUE)
+  expect_null(value_at(efc$e42dep, 123456))
+  expect_null(value_at(efc$e42dep, NULL))
+  expect_error(value_at(efc$e42dep, NA), regex = "`position` can't")
+  expect_error(value_at(efc$e42dep, c(3, NA)), regex = "`position` can't")
+})
diff --git a/.dev/value_at.R b/.dev/value_at.R
new file mode 100644
index 000000000..cdadc9dc6
--- /dev/null
+++ b/.dev/value_at.R
@@ -0,0 +1,52 @@
+#' @title Find the value(s) at a specific position in a variable
+#' @name value_at
+#'
+#' @description This function can be used to extract one or more values at a
+#' specific position in a variable.
+#'
+#' @param x A vector or factor.
+#' @param position An integer or a vector of integers, indicating the position(s)
+#' of the value(s) to be returned. Negative values are counted from the end of
+#' the vector. If `NA`, an error is thrown.
+#' @param remove_na Logical, if `TRUE`, missing values are removed before
+#' computing the position. If `FALSE`, missing values are included in the
+#' computation.
+#' @param default The value to be returned if the position is out of range.
+#'
+#' @seealso `data_summary()` to use `value_at()` inside a `data_summary()` call.
+#'
+#' @return A vector with the value(s) at the specified position(s).
+#'
+#' @examples
+#' data(mtcars)
+#' # 5th value
+#' value_at(mtcars$mpg, 5)
+#' # last value
+#' value_at(mtcars$mpg, -1)
+#' # out of range, return default
+#' value_at(mtcars$mpg, 150)
+#' # return 2nd and fifth value
+#' value_at(mtcars$mpg, c(2, 5))
+#' @export
+value_at <- function(x, position = 1, default = NULL, remove_na = FALSE) {
+  if (remove_na) {
+    x <- x[!is.na(x)]
+  }
+  n <- length(x)
+  unlist(lapply(position, .values_at, x = x, n = n, default = default), use.names = FALSE)
+}
+
+# helper ----
+
+.values_at <- function(x, position, n, default) {
+  if (is.na(position)) {
+    insight::format_error("`position` can't be `NA`.")
+  }
+  if (position < 0L) {
+    position <- position + n + 1
+  }
+  if (position <= 0 || position > n) {
+    return(default)
+  }
+  x[position]
+}
diff --git a/DESCRIPTION b/DESCRIPTION
index 048d15065..eeee80702 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.9.1.4
+Version: 0.9.1.5
 Authors@R: c(
     person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
diff --git a/NAMESPACE b/NAMESPACE
index e89863807..d10d1884b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -52,6 +52,10 @@ S3method(data_modify,data.frame)
 S3method(data_modify,default)
 S3method(data_modify,grouped_df)
 S3method(data_peek,data.frame)
+S3method(data_summary,data.frame)
+S3method(data_summary,default)
+S3method(data_summary,grouped_df)
+S3method(data_summary,matrix)
 S3method(data_tabulate,data.frame)
 S3method(data_tabulate,default)
 S3method(data_tabulate,grouped_df)
@@ -90,6 +94,7 @@ S3method(plot,visualisation_recipe)
 S3method(print,data_codebook)
 S3method(print,data_seek)
 S3method(print,dw_data_peek)
+S3method(print,dw_data_summary)
 S3method(print,dw_data_tabulate)
 S3method(print,dw_data_tabulates)
 S3method(print,dw_data_xtabulate)
@@ -249,6 +254,7 @@ export(data_rotate)
 export(data_seek)
 export(data_select)
 export(data_separate)
+export(data_summary)
 export(data_tabulate)
 export(data_to_long)
 export(data_to_wide)
diff --git a/NEWS.md b/NEWS.md
index 968c9d417..8b6ba88da 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # datawizard 0.9.2
 
+NEW FUNCTIONS
+
+* `data_summary()`, to compute summary statistics of (grouped) data frames.
+
 CHANGES
 
 * `data_modify()` gets three new arguments, `.at`, `.if` and `.modify`, to modify
diff --git a/R/adjust.R b/R/adjust.R
index 01b1b5a2c..821821864 100644
--- a/R/adjust.R
+++ b/R/adjust.R
@@ -124,11 +124,11 @@ adjust <- function(data,
       predictors[predictors == predictors_num] <- paste0("s(", predictors_num, ")")
     }
     formula_predictors <- paste(c("1", predictors), collapse = " + ")
-    formula <- paste(var, "~", formula_predictors)
+    model_formula <- paste(var, "~", formula_predictors)
 
     x <- .model_adjust_for(
       data = data[unique(c(var, effect, facs))],
-      formula,
+      model_formula = model_formula,
       multilevel = multilevel,
       additive = additive,
       bayesian = bayesian,
@@ -148,7 +148,7 @@ data_adjust <- adjust
 
 #' @keywords internal
 .model_adjust_for <- function(data,
-                              formula,
+                              model_formula,
                               multilevel = FALSE,
                               additive = FALSE,
                               bayesian = FALSE,
@@ -159,32 +159,28 @@ data_adjust <- adjust
     # Bayesian
     if (bayesian) {
       insight::check_if_installed("rstanarm")
-      model <- rstanarm::stan_gamm4(stats::as.formula(formula), random = formula_random, data = data, refresh = 0)
+      model <- rstanarm::stan_gamm4(stats::as.formula(model_formula), random = formula_random, data = data, refresh = 0)
       # Frequentist
     } else {
       insight::check_if_installed("gamm4")
-      model <- gamm4::gamm4(stats::as.formula(formula), random = formula_random, data = data)
+      model <- gamm4::gamm4(stats::as.formula(model_formula), random = formula_random, data = data)
     }
 
     # Linear -------------------------
-  } else {
+  } else if (bayesian) {
     # Bayesian
-    if (bayesian) {
-      insight::check_if_installed("rstanarm")
-      if (multilevel) {
-        model <- rstanarm::stan_lmer(paste(formula, formula_random), data = data, refresh = 0)
-      } else {
-        model <- rstanarm::stan_glm(formula, data = data, refresh = 0)
-      }
-      # Frequentist
+    insight::check_if_installed("rstanarm")
+    if (multilevel) {
+      model <- rstanarm::stan_lmer(paste(model_formula, formula_random), data = data, refresh = 0)
     } else {
-      if (multilevel) {
-        insight::check_if_installed("lme4")
-        model <- lme4::lmer(paste(formula, formula_random), data = data)
-      } else {
-        model <- stats::lm(formula, data = data)
-      }
+      model <- rstanarm::stan_glm(model_formula, data = data, refresh = 0)
     }
+  } else if (multilevel) {
+    # Frequentist
+    insight::check_if_installed("lme4")
+    model <- lme4::lmer(paste(model_formula, formula_random), data = data)
+  } else {
+    model <- stats::lm(model_formula, data = data)
   }
 
   adjusted <- insight::get_residuals(model)
diff --git a/R/assign_labels.R b/R/assign_labels.R
index 9390101ad..bd35513bf 100644
--- a/R/assign_labels.R
+++ b/R/assign_labels.R
@@ -79,7 +79,7 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
       attr(x, "label") <- variable
     } else {
       insight::format_error(
-        "Variable labels (argument `variable`) must be provided as a single character string, e.g. `variable = \"mylabel\"`."
+        "Variable labels (argument `variable`) must be provided as a single character string, e.g. `variable = \"mylabel\"`." # nolint
       )
     }
   }
@@ -88,13 +88,13 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
   if (!is.null(values)) {
     # extract unique values
     unique_values <- as.vector(sort(stats::na.omit(unique(x))))
-    labels <- NULL
+    value_labels <- NULL
 
     # do we have a names vector for "values"?
     # else check if number of labels and values match
     if (is.null(names(values))) {
       if (length(values) == length(unique_values)) {
-        labels <- stats::setNames(unique_values, values)
+        value_labels <- stats::setNames(unique_values, values)
       } else {
         insight::format_error(
           "Cannot add labels. Number of unique values and number of value labels are not equal.",
@@ -114,11 +114,11 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
 
       if (length(values)) {
         # we need to switch names and values
-        labels <- stats::setNames(coerce_to_numeric(names(values)), values)
+        value_labels <- stats::setNames(coerce_to_numeric(names(values)), values)
       }
     }
 
-    attr(x, "labels") <- labels
+    attr(x, "labels") <- value_labels
   }
 
   x
diff --git a/R/categorize.R b/R/categorize.R
index d440069d8..341d0c0c9 100644
--- a/R/categorize.R
+++ b/R/categorize.R
@@ -145,28 +145,8 @@ categorize.numeric <- function(x,
                                labels = NULL,
                                verbose = TRUE,
                                ...) {
-  # check arguments
-  if (is.character(split)) {
-    split <- match.arg(
-      split,
-      choices = c(
-        "median", "mean", "quantile", "equal_length", "equal_range",
-        "equal", "equal_distance", "range", "distance"
-      )
-    )
-  }
-
-  if (is.character(split) && split %in% c("quantile", "equal_length") && is.null(n_groups)) {
-    insight::format_error(
-      "Recoding based on quantiles or equal-sized groups requires the `n_groups` argument to be specified."
-    )
-  }
-
-  if (is.character(split) && split == "equal_range" && is.null(n_groups) && is.null(range)) {
-    insight::format_error(
-      "Recoding into groups with equal range requires either the `range` or `n_groups` argument to be specified."
-    )
-  }
+  # sanity check
+  split <- .sanitize_split_arg(split, n_groups, range)
 
   # handle aliases
   if (identical(split, "equal_length")) split <- "length"
@@ -221,28 +201,7 @@ categorize.numeric <- function(x,
   original_x[!is.na(original_x)] <- out
 
   # turn into factor?
-  if (!is.null(labels)) {
-    if (length(labels) == length(unique(out))) {
-      original_x <- as.factor(original_x)
-      levels(original_x) <- labels
-    } else if (length(labels) == 1 && labels %in% c("mean", "median")) {
-      original_x <- as.factor(original_x)
-      no_na_x <- original_x[!is.na(original_x)]
-      if (labels == "mean") {
-        labels <- stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x
-      } else {
-        labels <- stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x
-      }
-      levels(original_x) <- insight::format_value(labels, ...)
-    } else if (isTRUE(verbose)) {
-      insight::format_warning(
-        "Argument `labels` and levels of the recoded variable are not of the same length.",
-        "Variable will not be converted to factor."
-      )
-    }
-  }
-
-  original_x
+  .original_x_to_factor(original_x, x, labels, out, verbose, ...)
 }
 
 
@@ -283,15 +242,15 @@ categorize.data.frame <- function(x,
   # create the new variables and updates "select", so new variables are processed
   if (!isFALSE(append)) {
     # process arguments
-    args <- .process_append(
+    my_args <- .process_append(
       x,
       select,
       append,
       append_suffix = "_r"
     )
     # update processed arguments
-    x <- args$x
-    select <- args$select
+    x <- my_args$x
+    select <- my_args$select
   }
 
   x[select] <- lapply(
@@ -342,15 +301,15 @@ categorize.grouped_df <- function(x,
   # create the new variables and updates "select", so new variables are processed
   if (!isFALSE(append)) {
     # process arguments
-    args <- .process_append(
+    my_args <- .process_append(
       x,
       select,
       append,
       append_suffix = "_r"
     )
     # update processed arguments
-    x <- args$x
-    select <- args$select
+    x <- my_args$x
+    select <- my_args$select
   }
 
   x <- as.data.frame(x)
@@ -387,3 +346,56 @@ categorize.grouped_df <- function(x,
   }
   seq(lowest, max(x), by = range)
 }
+
+
+.sanitize_split_arg <- function(split, n_groups, range) {
+  # check arguments
+  if (is.character(split)) {
+    split <- match.arg(
+      split,
+      choices = c(
+        "median", "mean", "quantile", "equal_length", "equal_range",
+        "equal", "equal_distance", "range", "distance"
+      )
+    )
+  }
+
+  if (is.character(split) && split %in% c("quantile", "equal_length") && is.null(n_groups)) {
+    insight::format_error(
+      "Recoding based on quantiles or equal-sized groups requires the `n_groups` argument to be specified."
+    )
+  }
+
+  if (is.character(split) && split == "equal_range" && is.null(n_groups) && is.null(range)) {
+    insight::format_error(
+      "Recoding into groups with equal range requires either the `range` or `n_groups` argument to be specified."
+    )
+  }
+
+  split
+}
+
+
+.original_x_to_factor <- function(original_x, x, labels, out, verbose, ...) {
+  if (!is.null(labels)) {
+    if (length(labels) == length(unique(out))) {
+      original_x <- as.factor(original_x)
+      levels(original_x) <- labels
+    } else if (length(labels) == 1 && labels %in% c("mean", "median")) {
+      original_x <- as.factor(original_x)
+      no_na_x <- original_x[!is.na(original_x)]
+      if (labels == "mean") {
+        labels <- stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x
+      } else {
+        labels <- stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x
+      }
+      levels(original_x) <- insight::format_value(labels, ...)
+    } else if (isTRUE(verbose)) {
+      insight::format_warning(
+        "Argument `labels` and levels of the recoded variable are not of the same length.",
+        "Variable will not be converted to factor."
+      )
+    }
+  }
+  original_x
+}
diff --git a/R/data_modify.R b/R/data_modify.R
index 40a186736..c9b9d035a 100644
--- a/R/data_modify.R
+++ b/R/data_modify.R
@@ -177,69 +177,14 @@ data_modify.data.frame <- function(data, ..., .if = NULL, .at = NULL, .modify =
     }
 
     for (i in seq_along(dots)) {
-      # iterate expressions for new variables
-      symbol <- dots[[i]]
-
-      # expression is given as character string in a variable, but named, e.g.
-      # a <- "2 * Sepal.Width"
-      # data_modify(iris, double_SepWidth = a)
-      # we reconstruct the symbol as if it were provided as literal expression.
-      # However, we need to check that we don't have a character vector,
-      # like: data_modify(iris, new_var = "a")
-      # this one should be recycled instead.
-      if (!is.character(symbol)) {
-        eval_symbol <- .dynEval(symbol, ifnotfound = NULL)
-        if (is.character(eval_symbol)) {
-          symbol <- try(str2lang(paste0(names(dots)[i], " = ", eval_symbol)), silent = TRUE)
-          # we may have the edge-case of having a function that returns a character
-          # vector, like "new_var = sample(letters[1:3])". In this case, "eval_symbol"
-          # is of type character, but no symbol, thus str2lang() above creates a
-          # wrong pattern. We then take "eval_symbol" as character input.
-          if (inherits(symbol, "try-error")) {
-            symbol <- str2lang(paste0(
-              names(dots)[i],
-              " = c(", paste0("\"", eval_symbol, "\"", collapse = ","), ")"
-            ))
-          }
-        }
-      }
-
-      # finally, we can evaluate expression and get values for new variables
-      new_variable <- try(with(data, eval(symbol)), silent = TRUE)
-
-      # successful, or any errors, like misspelled variable name?
-      if (inherits(new_variable, "try-error")) {
-        # in which step did error happen?
-        step_number <- switch(as.character(i),
-          "1" = "the first expression",
-          "2" = "the second expression",
-          "3" = "the third expression",
-          paste("expression", i)
-        )
-        step_msg <- paste0("There was an error in ", step_number, ".")
-        # try to find out which variable was the cause for the error
-        error_msg <- attributes(new_variable)$condition$message
-        if (grepl("object '(.*)' not found", error_msg)) {
-          error_var <- gsub("object '(.*)' not found", "\\1", error_msg)
-          insight::format_error(
-            paste0(step_msg, " Variable \"", error_var, "\" was not found in the dataset or in the environment."),
-            .misspelled_string(colnames(data), error_var, "Possibly misspelled or not yet defined?")
-          )
-        } else {
-          insight::format_error(paste0(
-            step_msg, " ", insight::format_capitalize(error_msg),
-            ". Possibly misspelled or not yet defined?"
-          ))
-        }
-      }
-
+      # create new variable
+      new_variable <- .get_new_dots_variable(dots, i, data)
       # give informative error when new variable doesn't match number of rows
       if (!is.null(new_variable) && length(new_variable) != nrow(data) && (nrow(data) %% length(new_variable)) != 0) {
         insight::format_error(
           "New variable has not the same length as the other variables in the data frame and cannot be recycled."
         )
       }
-
       data[[names(dots)[i]]] <- new_variable
     }
   }
@@ -375,3 +320,70 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
 
   data
 }
+
+.get_new_dots_variable <- function(dots, i, data) {
+  # iterate expressions for new variables
+  symbol <- dots[[i]]
+
+  # expression is given as character string in a variable, but named, e.g.
+  # a <- "2 * Sepal.Width"
+  # data_modify(iris, double_SepWidth = a)
+  # we reconstruct the symbol as if it were provided as literal expression.
+  # However, we need to check that we don't have a character vector,
+  # like: data_modify(iris, new_var = "a")
+  # this one should be recycled instead.
+  if (!is.character(symbol)) {
+    eval_symbol <- .dynEval(symbol, ifnotfound = NULL)
+    if (is.character(eval_symbol)) {
+      symbol <- try(str2lang(paste0(names(dots)[i], " = ", eval_symbol)), silent = TRUE)
+      # we may have the edge-case of having a function that returns a character
+      # vector, like "new_var = sample(letters[1:3])". In this case, "eval_symbol"
+      # is of type character, but no symbol, thus str2lang() above creates a
+      # wrong pattern. We then take "eval_symbol" as character input.
+      if (inherits(symbol, "try-error")) {
+        symbol <- str2lang(paste0(
+          names(dots)[i],
+          " = c(", paste0("\"", eval_symbol, "\"", collapse = ","), ")"
+        ))
+      }
+    }
+  }
+
+  # finally, we can evaluate expression and get values for new variables
+  symbol_string <- insight::safe_deparse(symbol)
+  if (!is.null(symbol_string) && all(symbol_string == "n()")) {
+    # "special" functions
+    new_variable <- nrow(data)
+  } else {
+    # default evaluation of expression
+    new_variable <- try(with(data, eval(symbol)), silent = TRUE)
+  }
+
+  # successful, or any errors, like misspelled variable name?
+  if (inherits(new_variable, "try-error")) {
+    # in which step did error happen?
+    step_number <- switch(as.character(i),
+      "1" = "the first expression",
+      "2" = "the second expression",
+      "3" = "the third expression",
+      paste("expression", i)
+    )
+    step_msg <- paste0("There was an error in ", step_number, ".")
+    # try to find out which variable was the cause for the error
+    error_msg <- attributes(new_variable)$condition$message
+    if (grepl("object '(.*)' not found", error_msg)) {
+      error_var <- gsub("object '(.*)' not found", "\\1", error_msg)
+      insight::format_error(
+        paste0(step_msg, " Variable \"", error_var, "\" was not found in the dataset or in the environment."),
+        .misspelled_string(colnames(data), error_var, "Possibly misspelled or not yet defined?")
+      )
+    } else {
+      insight::format_error(paste0(
+        step_msg, " ", insight::format_capitalize(error_msg),
+        ". Possibly misspelled or not yet defined?"
+      ))
+    }
+  }
+
+  new_variable
+}
diff --git a/R/data_summary.R b/R/data_summary.R
new file mode 100644
index 000000000..8d15f8483
--- /dev/null
+++ b/R/data_summary.R
@@ -0,0 +1,219 @@
+#' @title Summarize data
+#' @name data_summary
+#'
+#' @description This function can be used to compute summary statistics for a
+#' data frame or a matrix.
+#'
+#' @param x A (grouped) data frame.
+#' @param by Optional character string, indicating the name of a variable in `x`.
+#' If supplied, the data will be split by this variable and summary statistics
+#' will be computed for each group.
+#' @param include_na Logical. If `TRUE`, missing values are included as a level
+#' in the grouping variable. If `FALSE`, missing values are omitted from the
+#' grouping variable.
+#' @param ... One or more named expressions that define the new variable name
+#' and the function to compute the summary statistic. Example:
+#' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided
+#' as a character string, e.g. `"mean_sepal_width = mean(Sepal.Width)"`. The
+#' summary function `n()` can be used to count the number of observations.
+#'
+#' @return A data frame with the requested summary statistics.
+#'
+#' @examples
+#' data(iris)
+#' data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width))
+#' data_summary(
+#'   iris,
+#'   MW = mean(Sepal.Width),
+#'   SD = sd(Sepal.Width),
+#'   by = "Species"
+#' )
+#'
+#' # same as
+#' d <- data_group(iris, "Species")
+#' data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width))
+#'
+#' # multiple groups
+#' data(mtcars)
+#' data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear"))
+#'
+#' # expressions can also be supplied as character strings
+#' data_summary(mtcars, "MW = mean(mpg)", "SD = sd(mpg)", by = c("am", "gear"))
+#'
+#' # count observations within groups
+#' data_summary(mtcars, observations = n(), by = c("am", "gear"))
+#'
+#' # first and last observations of "mpg" within groups
+#' data_summary(
+#'   mtcars,
+#'   first = mpg[1],
+#'   last = mpg[length(mpg)],
+#'   by = c("am", "gear")
+#' )
+#' @export
+data_summary <- function(x, ...) {
+  UseMethod("data_summary")
+}
+
+
+#' @export
+data_summary.matrix <- function(x, ..., by = NULL, include_na = TRUE) {
+  data_summary(as.data.frame(x), ..., by = by, include_na = include_na)
+}
+
+
+#' @export
+data_summary.default <- function(x, ...) {
+  insight::format_error("`data_summary()` only works for (grouped) data frames and matrices.")
+}
+
+
+#' @rdname data_summary
+#' @export
+data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
+  dots <- eval(substitute(alist(...)))
+
+  # do we have any expression at all?
+  if (length(dots) == 0) {
+    insight::format_error("No expressions for calculating summary statistics provided.")
+  }
+
+  if (is.null(by)) {
+    # when we have no grouping, just compute a one-row summary
+    summarise <- .process_datasummary_dots(dots, x)
+    out <- data.frame(summarise)
+    colnames(out) <- vapply(summarise, names, character(1))
+  } else {
+    # sanity check - is "by" a character string?
+    if (!is.character(by)) {
+      insight::format_error("Argument `by` must be a character string indicating the name of variables in the data.")
+    }
+    # is "by" in the data?
+    if (!all(by %in% colnames(x))) {
+      by_not_found <- by[!by %in% colnames(x)]
+      insight::format_error(
+        paste0(
+          "Variable",
+          ifelse(length(by_not_found) > 1, "s ", " "),
+          text_concatenate(by_not_found, enclose = "\""),
+          " not found in the data."
+        ),
+        .misspelled_string(colnames(x), by_not_found, "Possibly misspelled?")
+      )
+    }
+    # split data, add NA levels, if requested
+    l <- lapply(x[by], function(i) {
+      if (include_na && anyNA(i)) {
+        addNA(i)
+      } else {
+        i
+      }
+    })
+    split_data <- split(x, l, drop = TRUE)
+    out <- lapply(split_data, function(s) {
+      # no data for combination? Return NULL
+      if (nrow(s) == 0) {
+        return(NULL)
+      }
+      # summarize data
+      summarise <- .process_datasummary_dots(dots, s)
+      # coerce to data frame
+      summarised_data <- data.frame(summarise)
+      # bind grouping-variables and values
+      summarised_data <- cbind(s[1, by], summarised_data)
+      # make sure we have proper column names
+      colnames(summarised_data) <- c(by, vapply(summarise, names, character(1)))
+      summarised_data
+    })
+    out <- do.call(rbind, out)
+  }
+  # sort data
+  out <- data_arrange(out, select = by)
+  # data attributes
+  class(out) <- c("dw_data_summary", "data.frame")
+  rownames(out) <- NULL
+  out
+}
+
+
+#' @export
+data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
+  # extract group variables
+  grps <- attr(x, "groups", exact = TRUE)
+  group_variables <- data_remove(grps, ".rows")
+  # if "by" is not supplied, use group variables
+  if (is.null(by)) {
+    by <- colnames(group_variables)
+  }
+  # remove information specific to grouped df's
+  attr(x, "groups") <- NULL
+  class(x) <- "data.frame"
+  data_summary(x, ..., by = by, include_na = include_na)
+}
+
+
+# helper -----------------------------------------------------------------------
+
+.process_datasummary_dots <- function(dots, data) {
+  out <- NULL
+  if (length(dots)) {
+    # we check for character vector of expressions, in which case
+    # "dots" should be unnamed
+    if (is.null(names(dots))) {
+      # if we have multiple strings, concatenate them to a character vector
+      # and put it into a list...
+      if (length(dots) > 1) {
+        if (all(vapply(dots, is.character, logical(1)))) {
+          dots <- list(unlist(dots))
+        } else {
+          insight::format_error("You cannot mix string and literal representation of expressions.")
+        }
+      }
+      # expression is given as character string, e.g.
+      # a <- "mean_sepwid = mean(Sepal.Width)"
+      # data_summary(iris, a, by = "Species")
+      # or as character vector, e.g.
+      # data_summary(iris, c("var_a = mean(Sepal.Width)", "var_b = sd(Sepal.Width)"))
+      character_symbol <- tryCatch(.dynEval(dots[[1]]), error = function(e) NULL)
+      # do we have a character vector? Then we can proceed
+      if (is.character(character_symbol)) {
+        dots <- lapply(character_symbol, function(s) {
+          # turn value from character vector into expression
+          str2lang(.dynEval(s))
+        })
+        names(dots) <- vapply(dots, function(n) insight::safe_deparse(n[[2]]), character(1))
+      }
+    }
+
+    out <- lapply(seq_along(dots), function(i) {
+      new_variable <- .get_new_dots_variable(dots, i, data)
+      stats::setNames(new_variable, names(dots)[i])
+    })
+  }
+
+  # check for correct length of output - must be a single value!
+  if (any(lengths(out) != 1)) {
+    insight::format_error(
+      paste0(
+        "Each expression must return a single value. Following expression",
+        ifelse(sum(lengths(out) != 1) > 1, "s", " "),
+        " returned more than one value: ",
+        text_concatenate(vapply(dots[lengths(out) != 1], insight::safe_deparse, character(1)), enclose = "\"")
+      )
+    )
+  }
+
+  out
+}
+
+
+# methods ----------------------------------------------------------------------
+
+#' @export
+print.dw_data_summary <- function(x, ...) {
+  if (nrow(x) == 0) {
+    cat("No matches found.\n")
+  } else {
+    cat(insight::export_table(x, missing = "<NA>", ...))
+  }
+}
diff --git a/_pkgdown.yaml b/_pkgdown.yaml
index 2adc0768c..0062b8a5a 100644
--- a/_pkgdown.yaml
+++ b/_pkgdown.yaml
@@ -61,6 +61,7 @@ reference:
       Functions to compute statistical summaries of data properties and distributions
     contents:
       - data_codebook
+      - data_summary
       - data_tabulate
       - data_peek
       - data_seek
diff --git a/man/data_summary.Rd b/man/data_summary.Rd
new file mode 100644
index 000000000..ccbf4c524
--- /dev/null
+++ b/man/data_summary.Rd
@@ -0,0 +1,67 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_summary.R
+\name{data_summary}
+\alias{data_summary}
+\alias{data_summary.data.frame}
+\title{Summarize data}
+\usage{
+data_summary(x, ...)
+
+\method{data_summary}{data.frame}(x, ..., by = NULL, include_na = TRUE)
+}
+\arguments{
+\item{x}{A (grouped) data frame.}
+
+\item{...}{One or more named expressions that define the new variable name
+and the function to compute the summary statistic. Example:
+\code{mean_sepal_width = mean(Sepal.Width)}. The expression can also be provided
+as a character string, e.g. \code{"mean_sepal_width = mean(Sepal.Width)"}. The
+summary function \code{n()} can be used to count the number of observations.}
+
+\item{by}{Optional character string, indicating the name of a variable in \code{x}.
+If supplied, the data will be split by this variable and summary statistics
+will be computed for each group.}
+
+\item{include_na}{Logical. If \code{TRUE}, missing values are included as a level
+in the grouping variable. If \code{FALSE}, missing values are omitted from the
+grouping variable.}
+}
+\value{
+A data frame with the requested summary statistics.
+}
+\description{
+This function can be used to compute summary statistics for a
+data frame or a matrix.
+}
+\examples{
+data(iris)
+data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width))
+data_summary(
+  iris,
+  MW = mean(Sepal.Width),
+  SD = sd(Sepal.Width),
+  by = "Species"
+)
+
+# same as
+d <- data_group(iris, "Species")
+data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width))
+
+# multiple groups
+data(mtcars)
+data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear"))
+
+# expressions can also be supplied as character strings
+data_summary(mtcars, "MW = mean(mpg)", "SD = sd(mpg)", by = c("am", "gear"))
+
+# count observations within groups
+data_summary(mtcars, observations = n(), by = c("am", "gear"))
+
+# first and last observations of "mpg" within groups
+data_summary(
+  mtcars,
+  first = mpg[1],
+  last = mpg[length(mpg)],
+  by = c("am", "gear")
+)
+}
diff --git a/tests/testthat/_snaps/data_summary.md b/tests/testthat/_snaps/data_summary.md
new file mode 100644
index 000000000..44b7f3f4c
--- /dev/null
+++ b/tests/testthat/_snaps/data_summary.md
@@ -0,0 +1,55 @@
+# data_summary, print
+
+    Code
+      print(out)
+    Output
+      am | gear |    MW |   SD
+      ------------------------
+       0 |    3 | 16.11 | 3.37
+       0 |    4 | 21.05 | 3.07
+       1 |    4 | 26.27 | 5.41
+       1 |    5 | 21.38 | 6.66
+
+# data_summary, with NA
+
+    Code
+      print(out)
+    Output
+      c172code |    MW
+      ----------------
+             1 | 87.12
+             2 | 94.05
+             3 | 75.00
+          <NA> | 47.80
+
+---
+
+    Code
+      print(out)
+    Output
+      c172code |    MW
+      ----------------
+             1 | 87.12
+             2 | 94.05
+             3 | 75.00
+
+---
+
+    Code
+      print(out)
+    Output
+      e42dep | c172code |     MW
+      --------------------------
+      1      |        2 |  17.00
+      2      |        2 |  34.25
+      3      |        1 |  39.50
+      3      |        2 |  52.44
+      3      |        3 |  52.00
+      3      |     <NA> |  84.00
+      4      |        1 | 134.75
+      4      |        2 | 119.26
+      4      |        3 |  88.80
+      4      |     <NA> |  43.29
+      <NA>   |        2 |   <NA>
+      <NA>   |     <NA> |   7.00
+
diff --git a/tests/testthat/test-data_reorder.R b/tests/testthat/test-data_reorder.R
index 34a8f5420..463f36040 100644
--- a/tests/testthat/test-data_reorder.R
+++ b/tests/testthat/test-data_reorder.R
@@ -1,11 +1,11 @@
 test_that("data_reorder works as expected", {
-  expect_equal(
-    names(data_reorder(iris, c("Species", "Sepal.Length"))),
+  expect_named(
+    data_reorder(iris, c("Species", "Sepal.Length")),
     c("Species", "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
   )
 
-  expect_warning(expect_equal(
-    names(data_reorder(iris, c("Species", "dupa"))),
+  expect_warning(expect_named(
+    data_reorder(iris, c("Species", "dupa")),
     c("Species", "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
   ))
 })
@@ -24,5 +24,6 @@ test_that("data_reorder preserves attributes", {
   a2 <- attributes(out2)
 
   # attributes may not be in the same order
-  expect_true(all(names(a1) %in% names(a2)) && length(a1) == length(a2))
+  expect_true(all(names(a1) %in% names(a2)))
+  expect_length(a1, length(a2))
 })
diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R
new file mode 100644
index 000000000..746d4c51a
--- /dev/null
+++ b/tests/testthat/test-data_summary.R
@@ -0,0 +1,230 @@
+test_that("data_summary, single row summary", {
+  data(iris)
+  out <- data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width))
+  expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4)
+  expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4)
+})
+
+
+test_that("data_summary, single row summary, string expression", {
+  data(iris)
+  out <- data_summary(iris, "MW = mean(Sepal.Width)", "SD = sd(Sepal.Width)")
+  expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4)
+  expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4)
+})
+
+
+test_that("data_summary, summary for groups", {
+  data(iris)
+  out <- data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = "Species")
+  expect_equal(
+    out$MW,
+    aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width,
+    tolerance = 1e-4
+  )
+  expect_equal(
+    out$SD,
+    aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width,
+    tolerance = 1e-4
+  )
+})
+
+
+test_that("data_summary, summary for groups, string expression", {
+  data(iris)
+  out <- data_summary(
+    iris,
+    "MW = mean(Sepal.Width)",
+    "SD = sd(Sepal.Width)",
+    by = "Species"
+  )
+  expect_equal(
+    out$MW,
+    aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width,
+    tolerance = 1e-4
+  )
+  expect_equal(
+    out$SD,
+    aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width,
+    tolerance = 1e-4
+  )
+})
+
+
+test_that("data_summary, grouped data frames", {
+  data(iris)
+  d <- data_group(iris, "Species")
+  out <- data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width))
+  expect_equal(
+    out$MW,
+    aggregate(iris["Sepal.Width"], list(iris$Species), mean)$Sepal.Width,
+    tolerance = 1e-4
+  )
+  expect_equal(
+    out$SD,
+    aggregate(iris["Sepal.Width"], list(iris$Species), sd)$Sepal.Width,
+    tolerance = 1e-4
+  )
+  # "by" overrides groups
+  data(mtcars)
+  d <- data_group(mtcars, "gear")
+  out <- data_summary(d, MW = mean(mpg), SD = sd(mpg), by = "am")
+  expect_identical(
+    out$MW,
+    aggregate(mtcars["mpg"], list(mtcars$am), mean)$mpg
+  )
+})
+
+
+test_that("data_summary, summary for multiple groups", {
+  data(mtcars)
+  out <- data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear"))
+  expect_equal(
+    out$MW,
+    aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg,
+    tolerance = 1e-4
+  )
+  expect_equal(
+    out$SD,
+    aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg,
+    tolerance = 1e-4
+  )
+  x <- data_group(mtcars, c("am", "gear"))
+  out <- data_summary(x, MW = mean(mpg), SD = sd(mpg))
+  expect_equal(
+    out$MW,
+    aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg,
+    tolerance = 1e-4
+  )
+  expect_equal(
+    out$SD,
+    aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg,
+    tolerance = 1e-4
+  )
+})
+
+
+test_that("data_summary, errors", {
+  data(iris)
+  data(mtcars)
+  # "by" must be character
+  expect_error(
+    data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = 5),
+    regex = "Argument `by` must be a character string"
+  )
+  # "by" must be in data
+  expect_error(
+    data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = "Speceis"),
+    regex = "Variable \"Speceis\" not"
+  )
+  # by for multiple variables
+  expect_error(
+    data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("bam", "gear")),
+    regex = "Variable \"bam\" not"
+  )
+  expect_error(
+    data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("bam", "geas")),
+    regex = "Did you mean one of \"am\" or \"gear\"?"
+  )
+  # not a data frame
+  expect_error(
+    data_summary(iris$Sepal.Width, MW = mean(Sepal.Width), SD = sd(Sepal.Width)),
+    regex = "only works for"
+  )
+  # no expressions
+  expect_error(
+    data_summary(iris, by = "Species"),
+    regex = "No expressions for calculating"
+  )
+  # wrong expression
+  expect_error(
+    data_summary(mtcars, mw = mesn(mpg), by = "am"),
+    regex = "There was an error"
+  )
+  # wrong variable name
+  expect_error(
+    data_summary(mtcars, n = max(mpeg)),
+    regex = "There was an error"
+  )
+  # expression returns more than one value
+  expect_error(
+    data_summary(mtcars, n = unique(mpg), j = c(min(am), max(am)), by = c("am", "gear")),
+    regex = "Each expression must return"
+  )
+})
+
+
+test_that("data_summary, values_at", {
+  data(mtcars)
+  out <- data_summary(mtcars, pos1 = mpg[1], pos_end = mpg[length(mpg)], by = c("am", "gear"))
+  # same as:
+  # dplyr::summarise(mtcars, pos1 = dplyr::first(mpg), pos_end = dplyr::last(mpg), .by = c("am", "gear"))
+  expect_equal(out$pos1, c(21.4, 24.4, 21, 26), tolerance = 1e-3)
+  expect_equal(out$pos_end, c(19.2, 17.8, 21.4, 15), tolerance = 1e-3)
+})
+
+
+test_that("data_summary, print", {
+  data(mtcars)
+  out <- data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear"))
+  expect_snapshot(print(out))
+})
+
+
+test_that("data_summary, with NA", {
+  data(efc, package = "datawizard")
+  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code")
+  expect_snapshot(print(out))
+  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", include_na = FALSE)
+  expect_snapshot(print(out))
+  # sorting for multiple groups
+  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code"))
+  expect_snapshot(print(out))
+})
+
+
+test_that("data_summary, inside functions", {
+  foo1 <- function(x, ...) {
+    datawizard::data_summary(x, ..., by = "Species")
+  }
+
+  foo2 <- function(x, by, ...) {
+    datawizard::data_summary(x, ..., by = by)
+  }
+
+  foo3 <- function(x, by) {
+    datawizard::data_summary(x, MW = mean(Sepal.Width), by = by)
+  }
+
+  data(iris)
+  out1 <- foo1(iris, MW = mean(Sepal.Width))
+  out2 <- foo2(iris, by = "Species", MW = mean(Sepal.Width))
+  out3 <- foo3(iris, "Species")
+  expect_equal(out1$MW, out2$MW, tolerance = 1e-4)
+  expect_equal(out1$MW, out3$MW, tolerance = 1e-4)
+})
+
+
+test_that("data_summary, expression as variable", {
+  data(mtcars)
+  a <- "MW = mean(mpg)"
+  b <- "SD = sd(mpg)"
+  out <- data_summary(mtcars, a, by = c("am", "gear"))
+  expect_named(out, c("am", "gear", "MW"))
+  expect_equal(out$MW, aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), mean)$mpg, tolerance = 1e-4)
+  expect_error(
+    data_summary(mtcars, a, b, by = c("am", "gear")),
+    regex = "You cannot mix"
+  )
+  out <- data_summary(mtcars, c(a, b), by = c("am", "gear"))
+  expect_named(out, c("am", "gear", "MW", "SD"))
+  expect_equal(out$SD, aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg, tolerance = 1e-4)
+})
+
+
+test_that("data_summary, extra functions", {
+  data(mtcars)
+  # n()
+  out <- data_summary(mtcars, n = n(), by = c("am", "gear"))
+  expect_identical(out$n, c(15L, 4L, 8L, 5L))
+})