Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

degroup() for cross-classified data #521

Merged
merged 24 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# datawizard 0.11.1
# datawizard 0.11.0.1

BREAKING CHANGES

Expand Down
15 changes: 6 additions & 9 deletions R/demean.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
#'
#' @note
#' Variables specified in `by` or `select` that could not be found in the data
#' arte ignored. A message is printed, indicating the variables that were not
#' are ignored. A message is printed, indicating the variables that were not
#' found.
#'
#' @section Heterogeneity Bias:
Expand Down Expand Up @@ -321,8 +321,8 @@ degroup <- function(x,

not_found <- setdiff(c(select, by), colnames(x))

if (length(not_found) && isTRUE(verbose)) {
insight::format_alert(
if (length(not_found)) {
insight::format_error(
paste0(
strengejacke marked this conversation as resolved.
Show resolved Hide resolved
"Variable",
ifelse(length(not_found) > 1, "s ", " "),
Expand All @@ -334,10 +334,6 @@ degroup <- function(x,
)
}

# make sure we have only valid variables
select <- intersect(colnames(x), select)
by <- intersect(colnames(x), by)

strengejacke marked this conversation as resolved.
Show resolved Hide resolved
# get data to demean...
dat <- x[, c(select, by)]

Expand Down Expand Up @@ -408,7 +404,6 @@ degroup <- function(x,
names(group_means_list) <- select
# create de-meaned variables by subtracting the group mean from each individual value
person_means_list <- lapply(select, function(i) dat[[i]] - group_means_list[[i]])
names(person_means_list) <- select
} else {
# cross-classified design: by > 1
group_means_list <- lapply(by, function(j) {
Expand All @@ -424,9 +419,11 @@ degroup <- function(x,
sum_group_means <- do.call(`+`, lapply(group_means_list, function(j) j[[i]]))
dat[[select[i]]] - sum_group_means
})
names(person_means_list) <- select
}

# preserve names
names(person_means_list) <- select

# convert to data frame and add suffix to column names

group_means <- as.data.frame(group_means_list)
Expand Down
19 changes: 12 additions & 7 deletions man/demean.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions tests/testthat/test-center.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ test_that("center, factors (grouped data)", {
poorman::ungroup() %>%
poorman::pull(Species)

manual <- iris %>%
poorman::pull(Species)
manual <- poorman::pull(iris, Species)

expect_identical(datawizard, manual)
})
Expand Down
34 changes: 17 additions & 17 deletions tests/testthat/test-demean.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,24 +66,24 @@ test_that("demean shows message if some vars don't exist", {

# see issue #520
test_that("demean for cross-classified designs (by > 1)", {
skip_if(getRversion() < "4.1.0") # for pipe
skip_if_not_installed("poorman")

data(efc, package = "datawizard")
dat <- na.omit(efc)
dat$e42dep <- factor(dat$e42dep)
dat$c172code <- factor(dat$c172code)

x2a <- dat |>
data_group(e42dep) |>
x2a <- dat %>%
data_group(e42dep) %>%
data_modify(
c12hour_e42dep = mean(c12hour)
) |>
data_ungroup() |>
data_group(c172code) |>
) %>%
data_ungroup() %>%
data_group(c172code) %>%
data_modify(
c12hour_c172code = mean(c12hour)
) |>
data_ungroup() |>
) %>%
data_ungroup() %>%
data_modify(
c12hour_within = c12hour - c12hour_e42dep - c12hour_c172code
)
Expand All @@ -108,19 +108,19 @@ test_that("demean for cross-classified designs (by > 1)", {
ignore_attr = TRUE
)

x2a <- dat |>
data_group(e42dep) |>
x2a <- dat %>%
data_group(e42dep) %>%
data_modify(
c12hour_e42dep = mean(c12hour, na.rm = TRUE),
neg_c_7_e42dep = mean(neg_c_7, na.rm = TRUE)
) |>
data_ungroup() |>
data_group(c172code) |>
) %>%
data_ungroup() %>%
data_group(c172code) %>%
data_modify(
c12hour_c172code = mean(c12hour, na.rm = TRUE),
neg_c_7_c172code = mean(neg_c_7, na.rm = TRUE)
) |>
data_ungroup() |>
) %>%
data_ungroup() %>%
data_modify(
c12hour_within = c12hour - c12hour_e42dep - c12hour_c172code,
neg_c_7_within = neg_c_7 - neg_c_7_e42dep - neg_c_7_c172code
Expand Down Expand Up @@ -165,7 +165,7 @@ test_that("demean, sanity checks", {
dat$e42dep <- factor(dat$e42dep)
dat$c172code <- factor(dat$c172code)

expect_message(
expect_error(
degroup(
dat,
select = c("c12hour", "neg_c_8"),
Expand All @@ -174,7 +174,7 @@ test_that("demean, sanity checks", {
),
regex = "Variable \"neg_c_8\" was not found"
)
expect_message(
expect_error(
degroup(
dat,
select = c("c12hour", "neg_c_8"),
Expand Down
Loading