diff --git a/NAMESPACE b/NAMESPACE index d10d1884b..006b33153 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -317,6 +317,7 @@ export(to_numeric) export(unnormalize) export(unstandardise) export(unstandardize) +export(value_at) export(visualisation_recipe) export(weighted_mad) export(weighted_mean) diff --git a/NEWS.md b/NEWS.md index 8b6ba88da..9be0338d6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ NEW FUNCTIONS * `data_summary()`, to compute summary statistics of (grouped) data frames. +* `value_at()`, to extract values at a specific position in a vector or factor. + CHANGES * `data_modify()` gets three new arguments, `.at`, `.if` and `.modify`, to modify diff --git a/R/data_summary.R b/R/data_summary.R index 654c20879..b8f6e0ed3 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -178,6 +178,19 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) { }) } + # check for correct length of output - must be a single value! + if (any(lengths(out) != 1)) { + insight::format_error( + "Each expression must return a single value. Following expressions returned more than one value:", + paste0( + "`", + vapply(dots[lengths(out) != 1], insight::safe_deparse, character(1)), + "`", + collapse = ", " + ) + ) + } + out } diff --git a/R/value_at.R b/R/value_at.R new file mode 100644 index 000000000..2b923b6ff --- /dev/null +++ b/R/value_at.R @@ -0,0 +1,50 @@ +#' @title Find the value at a specific position in a variable +#' @name value_at +#' +#' @description This function can be used to compute summary statistics for a +#' data frame or a matrix. +#' +#' @param x A vector or factor. +#' @param position An integer or a vector of integers, indicating the position(s) +#' of the value(s) to be returned. Negative values are counted from the end of +#' the vector. If `NA`, an error is thrown. +#' @param remove_na Logical, if `TRUE`, missing values are removed before +#' computing the position. If `FALSE`, missing values are included in the +#' computation. +#' @param default The value to be returned if the position is out of range. +#' +#' @return A vector with the value(s) at the specified position(s). +#' +#' @examples +#' data(mtcars) +#' # 5th value +#' value_at(mtcars$mpg, 5) +#' # last value +#' value_at(mtcars$mpg, -1) +#' # out of range, return default +#' value_at(mtcars$mpg, 150) +#' # return 2nd and fifth value +#' value_at(mtcars$mpg, c(2, 5)) +#' @export +value_at <- function(x, position = 1, default = NULL, remove_na = FALSE) { + if (remove_na) { + x <- x[!is.na(x)] + } + n <- length(x) + unlist(lapply(position, .values_at, x = x, n = n, default = default), use.names = FALSE) +} + +# helper ---- + +.values_at <- function(x, position, n, default) { + if (is.na(position)) { + insight::format_error("`position` can't be `NA`.") + } + if (position < 0L) { + position <- position + n + 1 + } + if (position <= 0 || position > n) { + return(default) + } + x[position] +} diff --git a/man/value_at.Rd b/man/value_at.Rd new file mode 100644 index 000000000..a2b1058ca --- /dev/null +++ b/man/value_at.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/value_at.R +\name{value_at} +\alias{value_at} +\title{Find the value at a specific position in a variable} +\usage{ +value_at(x, position = 1, default = NULL, remove_na = FALSE) +} +\arguments{ +\item{x}{A vector or factor.} + +\item{position}{An integer or a vector of integers, indicating the position(s) +of the value(s) to be returned. Negative values are counted from the end of +the vector. If \code{NA}, an error is thrown.} + +\item{default}{The value to be returned if the position is out of range.} + +\item{remove_na}{Logical, if \code{TRUE}, missing values are removed before +computing the position. If \code{FALSE}, missing values are included in the +computation.} +} +\value{ +A vector with the value(s) at the specified position(s). +} +\description{ +This function can be used to compute summary statistics for a +data frame or a matrix. +} +\examples{ +data(mtcars) +# 5th value +value_at(mtcars$mpg, 5) +# last value +value_at(mtcars$mpg, -1) +# out of range, return default +value_at(mtcars$mpg, 150) +# return 2nd and fifth value +value_at(mtcars$mpg, c(2, 5)) +} diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index 2f7fd3308..88145d959 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -141,6 +141,21 @@ test_that("data_summary, errors", { data_summary(mtcars, mw = mesn(mpg), by = "am"), regex = "There was an error" ) + # expression returns more than one value + expect_error( + data_summary(mtcars, n = unique(mpg), j = c(min(am), max(am)), by = c("am", "gear")), + regex = "Each expression must return" + ) +}) + + +test_that("data_summary, values_at", { + data(mtcars) + out <- data_summary(mtcars, pos1 = value_at(mpg), pos_end = value_at(mpg, -1), by = c("am", "gear")) + # same as: + # dplyr::summarise(mtcars, pos1 = dplyr::first(mpg), pos_end = dplyr::last(mpg), .by = c("am", "gear")) + expect_equal(out$pos1, c(21.4, 24.4, 21, 26), tolerance = 1e-3) + expect_equal(out$pos_end, c(19.2, 17.8, 21.4, 15), tolerance = 1e-3) }) diff --git a/tests/testthat/test-value_at.R b/tests/testthat/test-value_at.R new file mode 100644 index 000000000..a15256004 --- /dev/null +++ b/tests/testthat/test-value_at.R @@ -0,0 +1,9 @@ +test_that("value_at", { + data(efc, package = "datawizard") + expect_equal(value_at(efc$e42dep, 5), 4, ignore_attr = TRUE) + expect_equal(value_at(efc$e42dep, 4), NA, ignore_attr = TRUE) + expect_equal(value_at(efc$e42dep, 4, remove_na = TRUE), 4, ignore_attr = TRUE) + expect_equal(value_at(efc$c12hour, 5:7), efc$c12hour[5:7], ignore_attr = TRUE) + expect_equal(value_at(efc$e42dep, 123456, default = 55), 55, ignore_attr = TRUE) + expect_null(value_at(efc$e42dep, 123456)) +})