From a34dfd5ac5f48ee553e25243e2303a6614bc67d9 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Sun, 3 Dec 2023 12:39:13 +0000 Subject: [PATCH] n_factors(): Add Variance_Cumulative (explained variance) to summary (#921) * Add Variance_Cumulative (explained variance) to summary of n_factors() * please lintr * add also as attribute * add test * update description * news * fix test * implement #755 * fix spca * fix? * fix * simplify * styler --------- Co-authored-by: Daniel --- DESCRIPTION | 2 +- NEWS.md | 3 ++ R/n_factors.R | 9 ++++++ R/standardize_info.R | 16 ++++----- R/utils_pca_efa.R | 57 ++++++++++++++++----------------- tests/testthat/test-n_factors.R | 13 ++++++++ tests/testthat/test-pca.R | 44 ++++++------------------- 7 files changed, 70 insertions(+), 74 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index de8902ec4..731de5c86 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: parameters Title: Processing of Model Parameters -Version: 0.21.3.4 +Version: 0.21.3.5 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 778eb912a..b8cb37e53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,9 @@ * `model_parameters()` for models of package *survey* now gives informative messages when `bootstrap = TRUE` (which is currently not supported). +* `n_factors()` now also returns the explained variance for the number of + factors as attributes. + # parameters 0.21.3 ## Changes diff --git a/R/n_factors.R b/R/n_factors.R index ca70ee042..9b6c335af 100644 --- a/R/n_factors.R +++ b/R/n_factors.R @@ -340,6 +340,15 @@ n_factors <- function(x, n_Methods = as.numeric(by(out, as.factor(out$n_Factors), function(out) n <- nrow(out))) ) + # Add cumulative percentage of variance explained + fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient) + varex <- attributes(fa)$summary + # Extract number of factors from EFA output (usually MR1, ML1, etc.) + varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE)) + # Merge (and like that filter out empty methods) + by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors") + + attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting) attr(out, "summary") <- by_factors attr(out, "n") <- min(as.numeric(as.character( by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"] diff --git a/R/standardize_info.R b/R/standardize_info.R index 9993c60b2..0d6fe5b9f 100644 --- a/R/standardize_info.R +++ b/R/standardize_info.R @@ -382,12 +382,12 @@ standardize_info.default <- function(model, } if (info$is_linear) { - if (!robust) { - sd_y <- datawizard::weighted_sd(response, w) - mean_y <- datawizard::weighted_mean(response, w) - } else { + if (robust) { sd_y <- datawizard::weighted_mad(response, w) mean_y <- datawizard::weighted_median(response, w) + } else { + sd_y <- datawizard::weighted_sd(response, w) + mean_y <- datawizard::weighted_mean(response, w) } } else { sd_y <- 1 @@ -567,12 +567,12 @@ standardize_info.default <- function(model, response <- as.numeric(data[, variable]) } - if (!robust) { - sd_x <- datawizard::weighted_sd(response, weights) - mean_x <- datawizard::weighted_mean(response, weights) - } else { + if (robust) { sd_x <- datawizard::weighted_mad(response, weights) mean_x <- datawizard::weighted_median(response, weights) + } else { + sd_x <- datawizard::weighted_sd(response, weights) + mean_x <- datawizard::weighted_mean(response, weights) } list(sd = f * sd_x, mean = mean_x) diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R index 56bfaf932..b8d0211fa 100644 --- a/R/utils_pca_efa.R +++ b/R/utils_pca_efa.R @@ -107,7 +107,7 @@ summary.parameters_efa <- function(object, ...) { x <- as.data.frame(t(x[, cols])) - x <- cbind(data.frame("Parameter" = row.names(x), stringsAsFactors = FALSE), x) + x <- cbind(data.frame(Parameter = row.names(x), stringsAsFactors = FALSE), x) names(x) <- c("Parameter", attributes(object)$summary$Component) row.names(x) <- NULL @@ -149,40 +149,37 @@ predict.parameters_efa <- function(object, verbose = TRUE, ...) { attri <- attributes(object) - if (is.null(newdata)) { - if ("scores" %in% names(attri)) { - out <- as.data.frame(attri$scores) - if (isTRUE(keep_na)) { - out <- .merge_na(object, out, verbose) - } - } else { - if ("dataset" %in% names(attri)) { - out <- as.data.frame(stats::predict(attri$model, data = attri$dataset)) + + if (inherits(attri$model, c("psych", "principal", "psych", "fa"))) { + if (is.null(newdata)) { + if ("scores" %in% names(attri)) { + out <- as.data.frame(attri$scores) + if (isTRUE(keep_na)) { + # Because pre-made scores don't preserve NA + out <- .merge_na(object, out) + } } else { - insight::format_error( - "Could not retrieve data nor model. Please report an issue on {.url https://github.com/easystats/parameters/issues}." - ) + d <- attri$data_set + d <- d[vapply(d, is.numeric, logical(1))] + out <- as.data.frame(stats::predict(attri$model, data = d)) } - } - } else { - if (inherits(attri$model, c("psych", "fa"))) { - # Clean-up newdata (keep only the variables used in the model) - newdata <- newdata[names(attri$model$complexity)] # assuming "complexity" info is there - # psych:::predict.fa(object, data) - out <- as.data.frame(stats::predict(attri$model, data = newdata)) - } else if (inherits(attri$model, "spca")) { - # https://github.com/erichson/spca/issues/7 - newdata <- newdata[names(attri$model$center)] - if (attri$standardize) { - newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE) - newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE) - } - out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings) - out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out)))) } else { - out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...)) + # psych:::predict.principal(object, data) + out <- stats::predict(attri$model, data = newdata) + } + } else if (inherits(attri$model, "spca")) { + # https://github.com/erichson/spca/issues/7 + newdata <- newdata[names(attri$model$center)] + if (attri$standardize) { + newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE) + newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE) } + out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings) + out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out)))) + } else { + out <- as.data.frame(stats::predict(attri$model, newdata = attri$dataset, ...)) } + if (!is.null(names)) { names(out)[seq_along(names)] <- names } diff --git a/tests/testthat/test-n_factors.R b/tests/testthat/test-n_factors.R index 5480fdfaa..41198e9f1 100644 --- a/tests/testthat/test-n_factors.R +++ b/tests/testthat/test-n_factors.R @@ -71,3 +71,16 @@ test_that("n_factors, no rotation, psych only", { ) ) }) + +test_that("n_factors, variance explained", { + skip_on_cran() + skip_if_not_installed("nFactors") + skip_if_not_installed("psych") + set.seed(333) + x <- n_factors(mtcars[, 1:4], type = "PCA") + expect_equal( + attributes(x)$Variance_Explained$Variance_Cumulative, + c(0.84126, 0.85088, 0.85859, 0.85859), + tolerance = 1e-4 + ) +}) diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R index 546504693..014a3c102 100644 --- a/tests/testthat/test-pca.R +++ b/tests/testthat/test-pca.R @@ -29,16 +29,16 @@ test_that("principal_components", { test_that("principal_components, n", { data(iris) - x <- parameters::principal_components(iris[1:4], n = 2) + x <- principal_components(iris[1:4], n = 2) expect_named(x, c("Variable", "PC1", "PC2", "Complexity")) - x <- parameters::principal_components(iris[1:4], n = 1) + x <- principal_components(iris[1:4], n = 1) expect_named(x, c("Variable", "PC1", "Complexity")) }) test_that("principal_components", { - x <- parameters::principal_components(mtcars[, 1:7]) + x <- principal_components(mtcars[, 1:7]) expect_equal( x$PC1, @@ -55,38 +55,17 @@ test_that("principal_components", { ) expect_named(x, c("Variable", "PC1", "PC2", "Complexity")) -}) - -test_that("principal_components", { - x <- model_parameters(principal_components(mtcars[, 1:7], nfactors = 2)) - expect_equal( - x$RC1, - c( - -0.836114674884308, - 0.766808147590597, - 0.85441780762136, - 0.548502661888057, - -0.889046093964722, - 0.931879020871552, - -0.030485507571411 - ), - tolerance = 0.01 - ) - - expect_named(x, c("Variable", "RC1", "RC2", "Complexity", "Uniqueness")) - expect_identical(dim(suppressWarnings(predict(x))), c(32L, 2L)) - expect_identical(dim(suppressWarnings(predict(x, newdata = mtcars[1:3, 1:7]))), c(3L, 2L)) + expect_identical(dim(predict(x)), c(32L, 2L)) }) # predict ---------------------- # N.B tests will fail if `GPArotation` package is not installed -d <- na.omit(psych::bfi[, 1:25]) -model <- psych::fa(d, nfactors = 5) -mp <- model_parameters(model, sort = TRUE, threshold = "max") - test_that("predict model_parameters fa", { + d <- na.omit(psych::bfi[, 1:25]) + model <- psych::fa(d, nfactors = 5) + mp <- model_parameters(model, sort = TRUE, threshold = "max") pr <- suppressMessages( predict(mp, names = c("Neuroticism", "Conscientiousness", "Extraversion", "Agreeableness", "Opennness")) ) @@ -103,17 +82,12 @@ test_that("predict model_parameters fa", { ) expect_identical(nrow(predict(mp, keep_na = FALSE)), 2436L) expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L) -}) - - -test_that("predict factor_analysis", { - model <- factor_analysis(d, n = 5) - expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L) - expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L) expect_named( predict(mp, names = c("A", "B", "C", "D", "E"), keep_na = FALSE), c("A", "B", "C", "D", "E") ) + model <- factor_analysis(d, n = 5) + expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L) }) unloadNamespace("GPArotation")