n_factors(): Add Variance_Cumulative (explained variance) to summary (#…

…921) * Add Variance_Cumulative (explained variance) to summary of n_factors() * please lintr * add also as attribute * add test * update description * news * fix test * implement #755 * fix spca * fix? * fix * simplify * styler --------- Co-authored-by: Daniel <[email protected]>
easystats · Dec 3, 2023 · a34dfd5 · a34dfd5
1 parent 242610b
commit a34dfd5
Show file tree

Hide file tree

Showing 7 changed files with 70 additions and 74 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: parameters
 Title: Processing of Model Parameters
-Version: 0.21.3.4
+Version: 0.21.3.5
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",

diff --git a/NEWS.md b/NEWS.md
@@ -11,6 +11,9 @@
 * `model_parameters()` for models of package *survey* now gives informative
   messages when `bootstrap = TRUE` (which is currently not supported).
 
+* `n_factors()` now also returns the explained variance for the number of
+  factors as attributes.
+
 # parameters 0.21.3
 
 ## Changes

diff --git a/R/n_factors.R b/R/n_factors.R
@@ -340,6 +340,15 @@ n_factors <- function(x,
     n_Methods = as.numeric(by(out, as.factor(out$n_Factors), function(out) n <- nrow(out)))
   )
 
+  # Add cumulative percentage of variance explained
+  fa <- factor_analysis(x, cor = cor, n = max(by_factors$n_Factors)) # Get it from our fa:: wrapper (TODO: that's probably not the most efficient)
+  varex <- attributes(fa)$summary
+  # Extract number of factors from EFA output (usually MR1, ML1, etc.)
+  varex$n_Factors <- as.numeric(gsub("[^\\d]+", "", varex$Component, perl = TRUE))
+  # Merge (and like that filter out empty methods)
+  by_factors <- merge(by_factors, varex[, c("n_Factors", "Variance_Cumulative")], by = "n_Factors")
+
+  attr(out, "Variance_Explained") <- varex # We add all the variance explained (for plotting)
   attr(out, "summary") <- by_factors
   attr(out, "n") <- min(as.numeric(as.character(
     by_factors[by_factors$n_Methods == max(by_factors$n_Methods), "n_Factors"]

diff --git a/R/standardize_info.R b/R/standardize_info.R
@@ -382,12 +382,12 @@ standardize_info.default <- function(model,
   }
 
   if (info$is_linear) {
-    if (!robust) {
-      sd_y <- datawizard::weighted_sd(response, w)
-      mean_y <- datawizard::weighted_mean(response, w)
-    } else {
+    if (robust) {
       sd_y <- datawizard::weighted_mad(response, w)
       mean_y <- datawizard::weighted_median(response, w)
+    } else {
+      sd_y <- datawizard::weighted_sd(response, w)
+      mean_y <- datawizard::weighted_mean(response, w)
     }
   } else {
     sd_y <- 1
@@ -567,12 +567,12 @@ standardize_info.default <- function(model,
     response <- as.numeric(data[, variable])
   }
 
-  if (!robust) {
-    sd_x <- datawizard::weighted_sd(response, weights)
-    mean_x <- datawizard::weighted_mean(response, weights)
-  } else {
+  if (robust) {
     sd_x <- datawizard::weighted_mad(response, weights)
     mean_x <- datawizard::weighted_median(response, weights)
+  } else {
+    sd_x <- datawizard::weighted_sd(response, weights)
+    mean_x <- datawizard::weighted_mean(response, weights)
   }
 
   list(sd = f * sd_x, mean = mean_x)

diff --git a/R/utils_pca_efa.R b/R/utils_pca_efa.R
@@ -107,7 +107,7 @@ summary.parameters_efa <- function(object, ...) {
 
 
   x <- as.data.frame(t(x[, cols]))
-  x <- cbind(data.frame("Parameter" = row.names(x), stringsAsFactors = FALSE), x)
+  x <- cbind(data.frame(Parameter = row.names(x), stringsAsFactors = FALSE), x)
   names(x) <- c("Parameter", attributes(object)$summary$Component)
   row.names(x) <- NULL
 
@@ -149,40 +149,37 @@ predict.parameters_efa <- function(object,
                                    verbose = TRUE,
                                    ...) {
   attri <- attributes(object)
-  if (is.null(newdata)) {
-    if ("scores" %in% names(attri)) {
-      out <- as.data.frame(attri$scores)
-      if (isTRUE(keep_na)) {
-        out <- .merge_na(object, out, verbose)
-      }
-    } else {
-      if ("dataset" %in% names(attri)) {
-        out <- as.data.frame(stats::predict(attri$model, data = attri$dataset))
+
+  if (inherits(attri$model, c("psych", "principal", "psych", "fa"))) {
+    if (is.null(newdata)) {
+      if ("scores" %in% names(attri)) {
+        out <- as.data.frame(attri$scores)
+        if (isTRUE(keep_na)) {
+          # Because pre-made scores don't preserve NA
+          out <- .merge_na(object, out)
+        }
       } else {
-        insight::format_error(
-          "Could not retrieve data nor model. Please report an issue on {.url https://github.com/easystats/parameters/issues}."
-        )
+        d <- attri$data_set
+        d <- d[vapply(d, is.numeric, logical(1))]
+        out <- as.data.frame(stats::predict(attri$model, data = d))
       }
-    }
-  } else {
-    if (inherits(attri$model, c("psych", "fa"))) {
-      # Clean-up newdata (keep only the variables used in the model)
-      newdata <- newdata[names(attri$model$complexity)] # assuming "complexity" info is there
-      # psych:::predict.fa(object, data)
-      out <- as.data.frame(stats::predict(attri$model, data = newdata))
-    } else if (inherits(attri$model, "spca")) {
-      # https://github.com/erichson/spca/issues/7
-      newdata <- newdata[names(attri$model$center)]
-      if (attri$standardize) {
-        newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
-        newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
-      }
-      out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
-      out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
     } else {
-      out <- as.data.frame(stats::predict(attri$model, newdata = newdata, ...))
+      # psych:::predict.principal(object, data)
+      out <- stats::predict(attri$model, data = newdata)
+    }
+  } else if (inherits(attri$model, "spca")) {
+    # https://github.com/erichson/spca/issues/7
+    newdata <- newdata[names(attri$model$center)]
+    if (attri$standardize) {
+      newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$center, FUN = "-", check.margin = TRUE)
+      newdata <- sweep(newdata, MARGIN = 2, STATS = attri$model$scale, FUN = "/", check.margin = TRUE)
     }
+    out <- as.matrix(newdata) %*% as.matrix(attri$model$loadings)
+    out <- stats::setNames(as.data.frame(out), paste0("Component", seq_len(ncol(out))))
+  } else {
+    out <- as.data.frame(stats::predict(attri$model, newdata = attri$dataset, ...))
   }
+
   if (!is.null(names)) {
     names(out)[seq_along(names)] <- names
   }

diff --git a/tests/testthat/test-n_factors.R b/tests/testthat/test-n_factors.R
@@ -71,3 +71,16 @@ test_that("n_factors, no rotation, psych only", {
     )
   )
 })
+
+test_that("n_factors, variance explained", {
+  skip_on_cran()
+  skip_if_not_installed("nFactors")
+  skip_if_not_installed("psych")
+  set.seed(333)
+  x <- n_factors(mtcars[, 1:4], type = "PCA")
+  expect_equal(
+    attributes(x)$Variance_Explained$Variance_Cumulative,
+    c(0.84126, 0.85088, 0.85859, 0.85859),
+    tolerance = 1e-4
+  )
+})
diff --git a/tests/testthat/test-pca.R b/tests/testthat/test-pca.R
@@ -29,16 +29,16 @@ test_that("principal_components", {
 
 test_that("principal_components, n", {
   data(iris)
-  x <- parameters::principal_components(iris[1:4], n = 2)
+  x <- principal_components(iris[1:4], n = 2)
   expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))
 
-  x <- parameters::principal_components(iris[1:4], n = 1)
+  x <- principal_components(iris[1:4], n = 1)
   expect_named(x, c("Variable", "PC1", "Complexity"))
 })
 
 
 test_that("principal_components", {
-  x <- parameters::principal_components(mtcars[, 1:7])
+  x <- principal_components(mtcars[, 1:7])
 
   expect_equal(
     x$PC1,
@@ -55,38 +55,17 @@ test_that("principal_components", {
   )
 
   expect_named(x, c("Variable", "PC1", "PC2", "Complexity"))
-})
-
-test_that("principal_components", {
-  x <- model_parameters(principal_components(mtcars[, 1:7], nfactors = 2))
-  expect_equal(
-    x$RC1,
-    c(
-      -0.836114674884308,
-      0.766808147590597,
-      0.85441780762136,
-      0.548502661888057,
-      -0.889046093964722,
-      0.931879020871552,
-      -0.030485507571411
-    ),
-    tolerance = 0.01
-  )
-
-  expect_named(x, c("Variable", "RC1", "RC2", "Complexity", "Uniqueness"))
-  expect_identical(dim(suppressWarnings(predict(x))), c(32L, 2L))
-  expect_identical(dim(suppressWarnings(predict(x, newdata = mtcars[1:3, 1:7]))), c(3L, 2L))
+  expect_identical(dim(predict(x)), c(32L, 2L))
 })
 
 
 # predict ----------------------
 # N.B tests will fail if `GPArotation` package is not installed
 
-d <- na.omit(psych::bfi[, 1:25])
-model <- psych::fa(d, nfactors = 5)
-mp <- model_parameters(model, sort = TRUE, threshold = "max")
-
 test_that("predict model_parameters fa", {
+  d <- na.omit(psych::bfi[, 1:25])
+  model <- psych::fa(d, nfactors = 5)
+  mp <- model_parameters(model, sort = TRUE, threshold = "max")
   pr <- suppressMessages(
     predict(mp, names = c("Neuroticism", "Conscientiousness", "Extraversion", "Agreeableness", "Opennness"))
   )
@@ -103,17 +82,12 @@ test_that("predict model_parameters fa", {
   )
   expect_identical(nrow(predict(mp, keep_na = FALSE)), 2436L)
   expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
-})
-
-
-test_that("predict factor_analysis", {
-  model <- factor_analysis(d, n = 5)
-  expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
-  expect_identical(nrow(predict(mp, newdata = d[1:10, ], keep_na = FALSE)), 10L)
   expect_named(
     predict(mp, names = c("A", "B", "C", "D", "E"), keep_na = FALSE),
     c("A", "B", "C", "D", "E")
   )
+  model <- factor_analysis(d, n = 5)
+  expect_identical(nrow(predict(model, keep_na = FALSE)), 2436L)
 })
 
 unloadNamespace("GPArotation")