Skip to content

Commit

Permalink
n_factors(): Add Variance_Cumulative (explained variance) to summary (#…
Browse files Browse the repository at this point in the history
…921)

* Add Variance_Cumulative (explained variance) to summary of n_factors()

* please lintr

* add also as attribute

* add test

* update description

* news

* fix test

* implement #755

* fix spca

* fix?

* fix

* simplify

* styler

---------

Co-authored-by: Daniel <[email protected]>
  • Loading branch information
DominiqueMakowski and strengejacke authored Dec 3, 2023
1 parent 242610b commit a34dfd5
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 74 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: parameters
Title: Processing of Model Parameters
Version: 0.21.3.4
Version: 0.21.3.5
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
* `model_parameters()` for models of package *survey* now gives informative
messages when `bootstrap = TRUE` (which is currently not supported).

* `n_factors()` now also returns the explained variance for the number of
factors as attributes.

# parameters 0.21.3

## Changes
Expand Down
9 changes: 9 additions & 0 deletions R/n_factors.R
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,15 @@ n_factors <- function(x,
n_Methods = as.numeric(by(out, as.factor(out$n_Factors), function(out) n <- nrow(out)))
)

# Add cumulative percentage of variance explained
fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient)
varex <- attributes(fa)$summary
# Extract number of factors from EFA output (usually MR1, ML1, etc.)
varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE))
# Merge (and like that filter out empty methods)
by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors")

attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting)
attr(out, "summary") <- by_factors
attr(out, "n") <- min(as.numeric(as.character(
by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"]
Expand Down
16 changes: 8 additions & 8 deletions R/standardize_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,12 @@ standardize_info.default <- function(model,
}

if (info$is_linear) {
if (!robust) {
sd_y <- datawizard::weighted_sd(response, w)
mean_y <- datawizard::weighted_mean(response, w)
} else {
if (robust) {
sd_y <- datawizard::weighted_mad(response, w)
mean_y <- datawizard::weighted_median(response, w)
} else {
sd_y <- datawizard::weighted_sd(response, w)
mean_y <- datawizard::weighted_mean(response, w)
}
} else {
sd_y <- 1
Expand Down Expand Up @@ -567,12 +567,12 @@ standardize_info.default <- function(model,
response <- as.numeric(data[, variable])
}

if (!robust) {
sd_x <- datawizard::weighted_sd(response, weights)
mean_x <- datawizard::weighted_mean(response, weights)
} else {
if (robust) {
sd_x <- datawizard::weighted_mad(response, weights)
mean_x <- datawizard::weighted_median(response, weights)
} else {
sd_x <- datawizard::weighted_sd(response, weights)
mean_x <- datawizard::weighted_mean(response, weights)
}

list(sd = f * sd_x, mean = mean_x)
Expand Down
57 changes: 27 additions & 30 deletions R/utils_pca_efa.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ summary.parameters_efa <- function(object, ...) {


x <- as.data.frame(t(x[, cols]))
x <- cbind(data.frame("Parameter" = row.names(x), stringsAsFactors = FALSE), x)
x <- cbind(data.frame(Parameter = row.names(x), stringsAsFactors = FALSE), x)
names(x) <- c("Parameter", attributes(object)$summary$Component)
row.names(x) <- NULL

Expand Down Expand Up @@ -149,40 +149,37 @@ predict.parameters_efa <- function(object,
verbose = TRUE,
...) {
attri <- attributes(object)
if (is.null(newdata)) {
if ("scores" %in% names(attri)) {
out <- as.data.frame(attri$scores)
if (isTRUE(keep_na)) {
out <- .merge_na(object, out, verbose)
}
} else {
if ("dataset" %in% names(attri)) {
out <- as.data.frame(stats::predict(attri$model, data = attri$dataset))

if (inherits(attri$model, c("psych", "principal", "psych", "fa"))) {
if (is.null(newdata)) {
if ("scores" %in% names(attri)) {
out <- as.data.frame(attri$scores)
if (isTRUE(keep_na)) {
# Because pre-made scores don't preserve NA
out <- .merge_na(object, out)
}
} else {
insight::format_error(
"Could not retrieve data nor model. Please report an issue on {.url https://github.com/easystats/parameters/issues}."
)
d <- attri$data_set
d <- d[vapply(d, is.numeric, logical(1))]
out <- as.data.frame(stats::predict(attri$model, data = d))
}
}
} else {
if (inherits(attri$model, c("psych", "fa"))) {
# Clean-up newdata (keep only the variables used in the model)
newdata <- newdata[names(attri$model$complexity)] # assuming "complexity" info is there
# psych:::predict.fa(object, data)
out <- as.data.frame(stats::predict(attri$model, data = newdata))
} else if (inherits(attri$model, "spca")) {
# https://github.com/erichson/spca/issues/7
newdata <- newdata[names(attri$model$center)]
if (attri$standardize) {
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
}
out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
} else {
out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...))
# psych:::predict.principal(object, data)
out <- stats::predict(attri$model, data = newdata)
}
} else if (inherits(attri$model, "spca")) {
# https://github.com/erichson/spca/issues/7
newdata <- newdata[names(attri$model$center)]
if (attri$standardize) {
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
}
out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
} else {
out <- as.data.frame(stats::predict(attri$model, newdata = attri$dataset, ...))
}

if (!is.null(names)) {
names(out)[seq_along(names)] <- names
}
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-n_factors.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,16 @@ test_that("n_factors, no rotation, psych only", {
)
)
})

test_that("n_factors, variance explained", {
skip_on_cran()
skip_if_not_installed("nFactors")
skip_if_not_installed("psych")
set.seed(333)
x <- n_factors(mtcars[, 1:4], type = "PCA")
expect_equal(
attributes(x)$Variance_Explained$Variance_Cumulative,
c(0.84126, 0.85088, 0.85859, 0.85859),
tolerance = 1e-4
)
})
44 changes: 9 additions & 35 deletions tests/testthat/test-pca.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ test_that("principal_components", {

test_that("principal_components, n", {
data(iris)
x <- parameters::principal_components(iris[1:4], n = 2)
x <- principal_components(iris[1:4], n = 2)
expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))

x <- parameters::principal_components(iris[1:4], n = 1)
x <- principal_components(iris[1:4], n = 1)
expect_named(x, c("Variable", "PC1", "Complexity"))
})


test_that("principal_components", {
x <- parameters::principal_components(mtcars[, 1:7])
x <- principal_components(mtcars[, 1:7])

expect_equal(
x$PC1,
Expand All @@ -55,38 +55,17 @@ test_that("principal_components", {
)

expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))
})

test_that("principal_components", {
x <- model_parameters(principal_components(mtcars[, 1:7], nfactors = 2))
expect_equal(
x$RC1,
c(
-0.836114674884308,
0.766808147590597,
0.85441780762136,
0.548502661888057,
-0.889046093964722,
0.931879020871552,
-0.030485507571411
),
tolerance = 0.01
)

expect_named(x, c("Variable", "RC1", "RC2", "Complexity", "Uniqueness"))
expect_identical(dim(suppressWarnings(predict(x))), c(32L, 2L))
expect_identical(dim(suppressWarnings(predict(x, newdata = mtcars[1:3, 1:7]))), c(3L, 2L))
expect_identical(dim(predict(x)), c(32L, 2L))
})


# predict ----------------------
# N.B tests will fail if `GPArotation` package is not installed

d <- na.omit(psych::bfi[, 1:25])
model <- psych::fa(d, nfactors = 5)
mp <- model_parameters(model, sort = TRUE, threshold = "max")

test_that("predict model_parameters fa", {
d <- na.omit(psych::bfi[, 1:25])
model <- psych::fa(d, nfactors = 5)
mp <- model_parameters(model, sort = TRUE, threshold = "max")
pr <- suppressMessages(
predict(mp, names = c("Neuroticism", "Conscientiousness", "Extraversion", "Agreeableness", "Opennness"))
)
Expand All @@ -103,17 +82,12 @@ test_that("predict model_parameters fa", {
)
expect_identical(nrow(predict(mp, keep_na = FALSE)), 2436L)
expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
})


test_that("predict factor_analysis", {
model <- factor_analysis(d, n = 5)
expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
expect_named(
predict(mp, names = c("A", "B", "C", "D", "E"), keep_na = FALSE),
c("A", "B", "C", "D", "E")
)
model <- factor_analysis(d, n = 5)
expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
})

unloadNamespace("GPArotation")

0 comments on commit a34dfd5

Please sign in to comment.