Merge branch 'main' into strengejacke/issue441

easystats · Feb 24, 2024 · 185cd10 · 185cd10
2 parents 1c3e1f5 + 83f9703
commit 185cd10
Show file tree

Hide file tree

Showing 67 changed files with 2,497 additions and 651 deletions.
diff --git a/.dev/copy_files_to_other_repos.R b/.dev/copy_files_to_other_repos.R
diff --git a/.github/workflows/html-5-check.yaml b/.github/workflows/html-5-check.yaml
@@ -6,8 +6,8 @@ on:
   pull_request:
     branches: [main, master]
 
-name: HTML5 check
+name: html-5-check
 
 jobs:
-  HTML5-check:
+  html-5-check:
     uses: easystats/workflows/.github/workflows/html-5-check.yaml@main
diff --git a/.github/workflows/update-to-latest-easystats.yaml b/.github/workflows/update-to-latest-easystats.yaml
@@ -0,0 +1,10 @@
+on:
+  schedule:
+    # Check for dependency updates once a month
+    - cron: "0 0 1 * *"
+
+name: update-to-latest-easystats
+
+jobs:
+  update-to-latest-easystats:
+    uses: easystats/workflows/.github/workflows/update-to-latest-easystats.yaml@main
diff --git a/.lintr b/.lintr
@@ -13,6 +13,7 @@ linters: linters_with_defaults(
     todo_comment_linter = NULL,
     undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)),
     undesirable_operator_linter = NULL,
+    if_not_else_linter(exceptions = character(0L)),
     unnecessary_concatenation_linter(allow_single_expression = FALSE),
     defaults = linters_with_tags(tags = NULL)
   )
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.8.0.13
+Version: 0.9.1.4
 Authors@R: c(
     person("Indrajeet", "Patil", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
@@ -33,13 +33,14 @@ BugReports: https://github.com/easystats/datawizard/issues
 Depends:
     R (>= 3.6)
 Imports:
-    insight (>= 0.19.3.2),
+    insight (>= 0.19.8),
     stats,
     utils
 Suggests: 
     bayestestR,
     boot,
     brms,
+    curl,
     data.table,
     dplyr (>= 1.0),
     effectsize,
@@ -62,7 +63,7 @@ Suggests:
     rmarkdown,
     rstanarm,
     see,
-    testthat (>= 3.1.6),
+    testthat (>= 3.2.0),
     tibble,
     tidyr,
     withr
@@ -71,7 +72,7 @@ VignetteBuilder:
 Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3.9000
+RoxygenNote: 7.3.1
 Config/testthat/edition: 3
 Config/testthat/parallel: true
 Config/Needs/website:

diff --git a/NAMESPACE b/NAMESPACE
@@ -67,6 +67,7 @@ S3method(describe_distribution,numeric)
 S3method(format,data_codebook)
 S3method(format,dw_data_peek)
 S3method(format,dw_data_tabulate)
+S3method(format,dw_data_xtabulate)
 S3method(format,dw_groupmeans)
 S3method(format,parameters_distribution)
 S3method(kurtosis,data.frame)
@@ -91,6 +92,8 @@ S3method(print,data_seek)
 S3method(print,dw_data_peek)
 S3method(print,dw_data_tabulate)
 S3method(print,dw_data_tabulates)
+S3method(print,dw_data_xtabulate)
+S3method(print,dw_data_xtabulates)
 S3method(print,dw_groupmeans)
 S3method(print,dw_groupmeans_list)
 S3method(print,dw_transformer)
@@ -102,10 +105,13 @@ S3method(print_html,data_codebook)
 S3method(print_html,dw_data_peek)
 S3method(print_html,dw_data_tabulate)
 S3method(print_html,dw_data_tabulates)
+S3method(print_html,dw_data_xtabulate)
+S3method(print_html,dw_data_xtabulates)
 S3method(print_md,data_codebook)
 S3method(print_md,dw_data_peek)
 S3method(print_md,dw_data_tabulate)
 S3method(print_md,dw_data_tabulates)
+S3method(print_md,dw_data_xtabulate)
 S3method(ranktransform,data.frame)
 S3method(ranktransform,factor)
 S3method(ranktransform,grouped_df)
@@ -167,7 +173,9 @@ S3method(to_factor,Date)
 S3method(to_factor,character)
 S3method(to_factor,data.frame)
 S3method(to_factor,default)
+S3method(to_factor,double)
 S3method(to_factor,factor)
+S3method(to_factor,haven_labelled)
 S3method(to_factor,logical)
 S3method(to_factor,numeric)
 S3method(to_numeric,Date)
@@ -179,6 +187,7 @@ S3method(to_numeric,data.frame)
 S3method(to_numeric,default)
 S3method(to_numeric,double)
 S3method(to_numeric,factor)
+S3method(to_numeric,haven_labelled)
 S3method(to_numeric,logical)
 S3method(to_numeric,numeric)
 S3method(unnormalize,data.frame)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,34 @@
-# datawizard (devel)
+# datawizard 0.9.2
+
+CHANGES
+
+* `data_modify()` gets three new arguments, `.at`, `.if` and `.modify`, to modify
+  variables at specific positions or based on logical conditions.
+
+* `data_tabulate()` was revised and gets several new arguments: a `weights`
+  argument, to compute weighted frequency tables. `include_na` allows to include
+  or omit missing values from the table. Furthermore, a `by` argument was added,
+  to compute crosstables (#479, #481).
+
+# datawizard 0.9.1
+
+CHANGES
+
+* `rescale()` gains `multiply` and `add` arguments, to expand ranges by a given
+  factor or value.
+
+* `to_factor()` and `to_numeric()` now support class `haven_labelled`.
+
+BUG FIXES
+
+* `to_numeric()` now correctly deals with inversed factor levels when
+  `preserve_levels = TRUE`.
+
+* `to_numeric()` inversed order of value labels when `dummy_factors = FALSE`.
+
+* `convert_to_na()` now preserves attributes for factors when `drop_levels = TRUE`.
+
+# datawizard 0.9.0
 
 NEW FUNCTIONS
 
@@ -41,6 +71,10 @@ BUG FIXES
 * Fixed issues in `data_write()` when writing labelled data into SPSS format
   and vectors were of different type as value labels.
 
+* Fixed issues in `data_write()` when writing labelled data into SPSS format
+  for character vectors with missing value labels, but existing variable
+  labels.
+
 * Fixed issue in `recode_into()` with probably wrong case number printed in the
   warning when several recode patterns match to one case.
 

diff --git a/R/categorize.R b/R/categorize.R
@@ -192,11 +192,11 @@ categorize.numeric <- function(x,
     breaks <- split
   } else {
     breaks <- switch(split,
-      "median" = stats::median(x),
-      "mean" = mean(x),
-      "length" = n_groups,
-      "quantile" = stats::quantile(x, probs = seq_len(n_groups) / n_groups),
-      "range" = .equal_range(x, range, n_groups, lowest),
+      median = stats::median(x),
+      mean = mean(x),
+      length = n_groups,
+      quantile = stats::quantile(x, probs = seq_len(n_groups) / n_groups),
+      range = .equal_range(x, range, n_groups, lowest),
       NULL
     )
   }

diff --git a/R/contrs.R b/R/contrs.R
@@ -32,58 +32,59 @@
 #'
 #' @seealso [stats::contr.sum()]
 #'
-#' @examples
-#' if (FALSE) {
-#'   data("mtcars")
+#' @examplesIf !identical(Sys.getenv("IN_PKGDOWN"), "true")
+#' \donttest{
+#' data("mtcars")
 #'
-#'   mtcars <- data_modify(mtcars, cyl = factor(cyl))
+#' mtcars <- data_modify(mtcars, cyl = factor(cyl))
 #'
-#'   c.treatment <- cbind(Intercept = 1, contrasts(mtcars$cyl))
-#'   solve(c.treatment)
-#'   #>            4 6 8
-#'   #> Intercept  1 0 0  # mean of the 1st level
-#'   #> 6         -1 1 0  # 2nd level - 1st level
-#'   #> 8         -1 0 1  # 3rd level - 1st level
+#' c.treatment <- cbind(Intercept = 1, contrasts(mtcars$cyl))
+#' solve(c.treatment)
+#' #>            4 6 8
+#' #> Intercept  1 0 0  # mean of the 1st level
+#' #> 6         -1 1 0  # 2nd level - 1st level
+#' #> 8         -1 0 1  # 3rd level - 1st level
 #'
-#'   contrasts(mtcars$cyl) <- contr.sum
-#'   c.sum <- cbind(Intercept = 1, contrasts(mtcars$cyl))
-#'   solve(c.sum)
-#'   #>                4      6      8
-#'   #> Intercept  0.333  0.333  0.333   # overall mean
-#'   #>            0.667 -0.333 -0.333   # deviation of 1st from overall mean
-#'   #>           -0.333  0.667 -0.333   # deviation of 2nd from overall mean
+#' contrasts(mtcars$cyl) <- contr.sum
+#' c.sum <- cbind(Intercept = 1, contrasts(mtcars$cyl))
+#' solve(c.sum)
+#' #>                4      6      8
+#' #> Intercept  0.333  0.333  0.333   # overall mean
+#' #>            0.667 -0.333 -0.333   # deviation of 1st from overall mean
+#' #>           -0.333  0.667 -0.333   # deviation of 2nd from overall mean
 #'
 #'
-#'   contrasts(mtcars$cyl) <- contr.deviation
-#'   c.deviation <- cbind(Intercept = 1, contrasts(mtcars$cyl))
-#'   solve(c.deviation)
-#'   #>                4     6     8
-#'   #> Intercept  0.333 0.333 0.333   # overall mean
-#'   #> 6         -1.000 1.000 0.000   # 2nd level - 1st level
-#'   #> 8         -1.000 0.000 1.000   # 3rd level - 1st level
+#' contrasts(mtcars$cyl) <- contr.deviation
+#' c.deviation <- cbind(Intercept = 1, contrasts(mtcars$cyl))
+#' solve(c.deviation)
+#' #>                4     6     8
+#' #> Intercept  0.333 0.333 0.333   # overall mean
+#' #> 6         -1.000 1.000 0.000   # 2nd level - 1st level
+#' #> 8         -1.000 0.000 1.000   # 3rd level - 1st level
 #'
-#'   ## With Interactions -----------------------------------------
-#'   mtcars <- data_modify(mtcars, am = C(am, contr = contr.deviation))
-#'   mtcars <- data_arrange(mtcars, select = c("cyl", "am"))
+#' ## With Interactions -----------------------------------------
+#' mtcars <- data_modify(mtcars, am = C(am, contr = contr.deviation))
+#' mtcars <- data_arrange(mtcars, select = c("cyl", "am"))
 #'
-#'   mm <- unique(model.matrix(~ cyl * am, data = mtcars))
-#'   rownames(mm) <- c(
-#'     "cyl4.am0", "cyl4.am1", "cyl6.am0",
-#'     "cyl6.am1", "cyl8.am0", "cyl8.am1"
-#'   )
+#' mm <- unique(model.matrix(~ cyl * am, data = mtcars))
+#' rownames(mm) <- c(
+#'   "cyl4.am0", "cyl4.am1", "cyl6.am0",
+#'   "cyl6.am1", "cyl8.am0", "cyl8.am1"
+#' )
 #'
-#'   solve(mm)
-#'   #>             cyl4.am0 cyl4.am1 cyl6.am0 cyl6.am1 cyl8.am0 cyl8.am1
-#'   #> (Intercept)    0.167    0.167    0.167    0.167    0.167    0.167  # overall mean
-#'   #> cyl6          -0.500   -0.500    0.500    0.500    0.000    0.000  # cyl MAIN eff: 2nd - 1st
-#'   #> cyl8          -0.500   -0.500    0.000    0.000    0.500    0.500  # cyl MAIN eff: 2nd - 1st
-#'   #> am1           -0.333    0.333   -0.333    0.333   -0.333    0.333  # am MAIN eff
-#'   #> cyl6:am1       1.000   -1.000   -1.000    1.000    0.000    0.000
-#'   #> cyl8:am1       1.000   -1.000    0.000    0.000   -1.000    1.000
+#' solve(mm)
+#' #>             cyl4.am0 cyl4.am1 cyl6.am0 cyl6.am1 cyl8.am0 cyl8.am1
+#' #> (Intercept)    0.167    0.167    0.167    0.167    0.167    0.167  # overall mean
+#' #> cyl6          -0.500   -0.500    0.500    0.500    0.000    0.000  # cyl MAIN eff: 2nd - 1st
+#' #> cyl8          -0.500   -0.500    0.000    0.000    0.500    0.500  # cyl MAIN eff: 2nd - 1st
+#' #> am1           -0.333    0.333   -0.333    0.333   -0.333    0.333  # am MAIN eff
+#' #> cyl6:am1       1.000   -1.000   -1.000    1.000    0.000    0.000
+#' #> cyl8:am1       1.000   -1.000    0.000    0.000   -1.000    1.000
 #' }
 #'
 #' @export
-contr.deviation <- function(n, base = 1,
+contr.deviation <- function(n,
+                            base = 1,
                             contrasts = TRUE,
                             sparse = FALSE) {
   cont <- stats::contr.treatment(n,

diff --git a/R/convert_to_na.R b/R/convert_to_na.R
@@ -105,7 +105,11 @@ convert_to_na.factor <- function(x, na = NULL, drop_levels = FALSE, verbose = TR
     # drop unused labels
     value_labels <- attr(x, "labels", exact = TRUE)
     if (is.factor(x) && isTRUE(drop_levels)) {
+      # save label attribute
+      variable_label <- attr(x, "label", exact = TRUE)
       x <- droplevels(x)
+      # droplevels() discards attributes, so we need to re-assign them
+      attr(x, "label") <- variable_label
     }
     attr(x, "labels") <- value_labels[!value_labels %in% na]
   }

diff --git a/R/data_arrange.R b/R/data_arrange.R
@@ -46,16 +46,16 @@ data_arrange.default <- function(data, select = NULL, safe = TRUE) {
   dont_exist <- select[which(!select %in% names(data))]
 
   if (length(dont_exist) > 0) {
-    if (!safe) {
-      insight::format_error(
+    if (safe) {
+      insight::format_warning(
         paste0(
           "The following column(s) don't exist in the dataset: ",
           text_concatenate(dont_exist), "."
         ),
         .misspelled_string(names(data), dont_exist, "Possibly misspelled?")
       )
     } else {
-      insight::format_warning(
+      insight::format_error(
         paste0(
           "The following column(s) don't exist in the dataset: ",
           text_concatenate(dont_exist), "."

diff --git a/R/data_codebook.R b/R/data_codebook.R
@@ -456,8 +456,14 @@ print_md.data_codebook <- function(x, ...) {
   # need to remove this one
   x$Prop <- NULL
   align <- c(
-    "ID" = "l", "Name" = "l", "Label" = "l", "Type" = "l", "Missings" = "r",
-    "Values" = "r", "Value Labels" = "l", "N" = "r"
+    ID = "l",
+    Name = "l",
+    Label = "l",
+    Type = "l",
+    Missings = "r",
+    Values = "r",
+    `Value Labels` = "l",
+    N = "r"
   )
   align <- align[colnames(x)]
   paste0(unname(align), collapse = "")

diff --git a/R/data_extract.R b/R/data_extract.R
@@ -114,10 +114,10 @@ data_extract.data.frame <- function(data,
 
   # chose which matched variables to extract
   select <- switch(extract,
-    "first" = select[1L],
-    "last" = select[length(select)],
-    "odd" = select[seq(1L, length(select), 2L)],
-    "even" = select[seq(2L, length(select), 2L)],
+    first = select[1L],
+    last = select[length(select)],
+    odd = select[seq(1L, length(select), 2L)],
+    even = select[seq(2L, length(select), 2L)],
     select
   )