diff --git a/DESCRIPTION b/DESCRIPTION index c2bde86a..d95e2bc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: sjstats Type: Package Encoding: UTF-8 Title: Collection of Convenient Functions for Common Statistical Computations -Version: 0.17.1.9000 -Date: 2018-10-05 +Version: 0.17.2 +Date: 2018-11-15 Authors@R: person("Daniel", "Lüdecke", role = c("aut", "cre"), email = "d.luedecke@uke.de", comment = c(ORCID = "0000-0002-8895-3206")) Maintainer: Daniel Lüdecke Description: Collection of convenient functions for common statistical computations, diff --git a/NAMESPACE b/NAMESPACE index 0bda3a21..916c52e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,8 @@ S3method(icc,brmsfit) S3method(icc,glmmTMB) S3method(icc,merMod) S3method(icc,stanreg) +S3method(is_singular,glmmTMB) +S3method(is_singular,merMod) S3method(mcse,brmsfit) S3method(mcse,stanmvreg) S3method(mcse,stanreg) @@ -80,6 +82,7 @@ S3method(print,sj_pval) S3method(print,sj_r2) S3method(print,sj_resample) S3method(print,sj_revar) +S3method(print,sj_revar_adjust) S3method(print,sj_rope) S3method(print,sj_se_icc) S3method(print,sj_splithalf) diff --git a/R/S3-methods.R b/R/S3-methods.R index 3951cae9..647efc97 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -186,7 +186,7 @@ print.sj_r2 <- function(x, digits = 3, ...) { #' @importFrom purrr map_chr map2_chr #' @export print.sj_icc <- function(x, digits = 4, ...) { - cat("\nIntra-Class Correlation Coefficient for Generalized Linear Mixed Model\n\n") + cat("\nIntraclass Correlation Coefficient for Generalized Linear Mixed Model\n\n") print_icc_and_r2(x, digits, ...) } @@ -216,7 +216,7 @@ print_icc_and_r2 <- function(x, digits, ...) { #' @export print.sj_icc_merMod <- function(x, comp, ...) { # print model information - cat(sprintf("\n%s\n\n", attr(x, "model", exact = T))) + cat(sprintf("\nIntraclass Correlation Coefficient for %s\n\n", attr(x, "model", exact = T))) cat(crayon::blue( sprintf("Family : %s (%s)\nFormula: %s\n\n", @@ -287,6 +287,8 @@ print.sj_icc_merMod <- function(x, comp, ...) { as.vector(x[i]))) } } + + cat("\n") } @@ -1424,6 +1426,30 @@ print.sj_grpmeans <- function(x, ...) { } +#' @importFrom crayon blue cyan +#' @export +print.sj_revar_adjust <- function(x, ...) { + cat("\nVariance Components of Mixed Models\n\n") + cat(crayon::blue(sprintf("Family : %s (%s)\nFormula: %s\n\n", x$family, x$link, deparse(x$formula)))) + + vals <- c( + sprintf("%.3f", x$var.fixef), + sprintf("%.3f", x$var.ranef), + sprintf("%.3f", x$var.disp), + sprintf("%.3f", x$var.dist), + sprintf("%.3f", x$var.resid) + ) + + vals <- format(vals, justify = "right") + + cat(sprintf(" fixed: %s\n", vals[1])) + cat(sprintf(" random: %s\n", vals[2])) + cat(sprintf(" residual: %s\n", vals[5])) + cat(crayon::cyan(sprintf(" dispersion: %s\n", vals[3]))) + cat(crayon::cyan(sprintf(" distribution: %s\n\n", vals[4]))) +} + + #' @export print.sj_revar <- function(x, ...) { # get parameters diff --git a/R/converge_ok.R b/R/converge_ok.R index 5e6b878c..b58b9c9a 100644 --- a/R/converge_ok.R +++ b/R/converge_ok.R @@ -12,6 +12,7 @@ #' @param tolerance Indicates up to which value the convergence result is #' accepted. The smaller \code{tolerance} is, the stricter the test #' will be. +#' @param ... Currently not used. #' #' @return For \code{converge_ok()}, a logical vector, which is \code{TRUE} if #' convergence is fine and \code{FALSE} if convergence is suspicious. @@ -19,16 +20,32 @@ #' \code{is_singluar()} returns \code{TRUE} if the model fit is singular. #' #' @details \code{converge_ok()} provides an alternative convergence test for -#' \code{\link[lme4]{merMod}}-objects, as discussed -#' \href{https://github.com/lme4/lme4/issues/120}{here} -#' and suggested by Ben Bolker in -#' \href{https://github.com/lme4/lme4/issues/120#issuecomment-39920269}{this comment}. -#' \cr \cr -#' \code{is_singular()} checks if a model fit is singular, and can -#' be used in case of post-fitting convergence warnings, such as -#' warnings about negative eigenvalues of the Hessian. If the fit -#' is singular (i.e. \code{is_singular()} returns \code{TRUE}), these -#' warnings can most likely be ignored. +#' \code{\link[lme4]{merMod}}-objects, as discussed +#' \href{https://github.com/lme4/lme4/issues/120}{here} +#' and suggested by Ben Bolker in +#' \href{https://github.com/lme4/lme4/issues/120#issuecomment-39920269}{this comment}. +#' \cr \cr +#' If a model is "singular", this means that some dimensions of the variance-covariance +#' matrix have been estimated as exactly zero. \code{is_singular()} checks if +#' a model fit is singular, and can be used in case of post-fitting convergence +#' warnings, such as warnings about negative eigenvalues of the Hessian. If the fit +#' is singular (i.e. \code{is_singular()} returns \code{TRUE}), these warnings +#' can most likely be ignored. +#' \cr \cr +#' There is no gold-standard about how to deal with singularity and which +#' random-effects specification to choose. Beside using fully Bayesian methods +#' (with informative priors), proposals in a frequentist framework are: +#' \itemize{ +#' \item avoid fitting overly complex models, such that the variance-covariance matrices can be estimated precisely enough (\cite{Matuschek et al. 2017}) +#' \item use some form of model selection to choose a model that balances predictive accuracy and overfitting/type I error (\cite{Bates et al. 2015}, \cite{Matuschek et al. 2017}) +#' \item \dQuote{keep it maximal}, i.e. fit the most complex model consistent with the experimental design, removing only terms required to allow a non-singular fit (\cite{Barr et al. 2013}) +#' } +#' +#' @references \itemize{ +#' \item Bates D, Kliegl R, Vasishth S, Baayen H. Parsimonious Mixed Models. arXiv:1506.04967, June 2015. +#' \item Barr DJ, Levy R, Scheepers C, Tily HJ. Random effects structure for confirmatory hypothesis testing: Keep it maximal. Journal of Memory and Language, 68(3):255–278, April 2013. +#' \item Matuschek H, Kliegl R, Vasishth S, Baayen H, Bates D. Balancing type I error and power in linear mixed models. Journal of Memory and Language, 94:305–315, 2017. +#' } #' #' @examples #' library(sjmisc) @@ -74,23 +91,25 @@ converge_ok <- function(x, tolerance = 0.001) { } -#' @importFrom lme4 getME -#' @importFrom glmmTMB getME #' @rdname converge_ok #' @export -is_singular <- function(x, tolerance = 1e-5) { - if (is_merMod(x)) { - theta <- lme4::getME(x, "theta") - # diagonal elements are identifiable because they are fitted - # with a lower bound of zero ... - diag.element <- lme4::getME(x, "lower") == 0 - any(abs(theta[diag.element]) < tolerance) - } else if (inherits(x, "glmmTMB")) { - theta <- glmmTMB::getME(x, "theta") - # diagonal elements are identifiable because they are fitted - # with a lower bound of zero ... - diag.element <- glmmTMB::getME(x, "lower") == 0 - any(abs(theta[diag.element]) < tolerance) - } else - warning("`x` must be a merMod- or glmmTMB-object.", call. = F) +is_singular <- function(x, tolerance = 1e-5, ...) { + UseMethod("is_singular") +} + +#' @importFrom lme4 getME +#' @export +is_singular.merMod <- function(x, tolerance = 1e-5, ...) { + theta <- lme4::getME(x, "theta") + # diagonal elements are identifiable because they are fitted + # with a lower bound of zero ... + diag.element <- lme4::getME(x, "lower") == 0 + any(abs(theta[diag.element]) < tolerance) +} + +#' @importFrom lme4 VarCorr +#' @export +is_singular.glmmTMB <- function(x, tolerance = 1e-5, ...) { + vc <- collapse_cond(lme4::VarCorr(x)) + any(sapply(vc, function(.x) any(abs(diag(.x)) < tolerance))) } diff --git a/R/icc.R b/R/icc.R index a058d445..9961fe5e 100644 --- a/R/icc.R +++ b/R/icc.R @@ -19,11 +19,9 @@ #' @param ppd Logical, if \code{TRUE}, variance decomposition is based on the #' posterior predictive distribution, which is the correct way for Bayesian #' non-Gaussian models. -#' @param adjusted Logical, if \code{TRUE}, the adjusted (and conditional) ICC -#' is calculated, which reflects the uncertainty of all random effects (see -#' 'Details'). \strong{Note} that if \code{adjusted = TRUE}, \strong{no} -#' additional information on the variance components is returned. -#' +#' @param adjusted Logical, if \code{TRUE}, the adjusted (and +#' conditional) ICC is calculated, which reflects the uncertainty of all +#' random effects (see 'Details'). #' #' @inheritParams hdi #' @@ -103,7 +101,7 @@ #' To get a meaningful ICC also for models with random slopes, use \code{adjusted = TRUE}. #' The adjusted ICC used the mean random effect variance, which is based #' on the random effect variances for each value of the random slope -#' (see \cite{Johnson 2014}). +#' (see \cite{Johnson et al. 2014}). #' \cr \cr #' \strong{ICC for models with multiple or nested random effects} #' \cr \cr @@ -759,15 +757,23 @@ icc.brmsfit <- function(x, re.form = NULL, typical = "mean", prob = .89, ppd = F #' objects are supported. #' #' @param x Fitted mixed effects model (of class \code{merMod}, \code{glmmTMB}, -#' \code{stanreg} or \code{brmsfit}). \code{get_re_var()} also accepts -#' an object of class \code{icc.lme4}, as returned by the -#' \code{\link{icc}} function. +#' \code{stanreg} or \code{brmsfit}). \code{get_re_var()} also accepts +#' an object of class \code{icc.lme4}, as returned by the +#' \code{\link{icc}} function. #' @param comp Name of the variance component to be returned. See 'Details'. +#' @param adjusted Logical, if \code{TRUE}, returns the variance of the fixed +#' and random effects as well as of the additive dispersion and +#' distribution-specific variance, which are used to calculate the +#' adjusted and conditional \code{\link{r2}} and \code{\link{icc}}. #' #' @return \code{get_re_var()} returns the value of the requested variance component, #' \code{re_var()} returns all random effects variances. #' -#' @references Aguinis H, Gottfredson RK, Culpepper SA. 2013. Best-Practice Recommendations for Estimating Cross-Level Interaction Effects Using Multilevel Modeling. Journal of Management 39(6): 1490–1528 (\doi{10.1177/0149206313478188}) +#' @references \itemize{ +#' \item Aguinis H, Gottfredson RK, Culpepper SA. 2013. Best-Practice Recommendations for Estimating Cross-Level Interaction Effects Using Multilevel Modeling. Journal of Management 39(6): 1490–1528 (\doi{10.1177/0149206313478188}) +#' \item Johnson PC, O'Hara RB. 2014. Extension of Nakagawa & Schielzeth's R2GLMM to random slopes models. Methods Ecol Evol, 5: 944-946. (\doi{10.1111/2041-210X.12225}) +#' \item Nakagawa S, Johnson P, Schielzeth H (2017) The coefficient of determination R2 and intra-class correlation coefficient from generalized linear mixed-effects models revisted and expanded. J. R. Soc. Interface 14. \doi{10.1098/rsif.2017.0213} +#' } #' #' @details The random effect variances indicate the between- and within-group #' variances as well as random-slope variance and random-slope-intercept @@ -785,6 +791,18 @@ icc.brmsfit <- function(x, re.form = NULL, typical = "mean", prob = .89, ppd = F #' direct effects) affect the between-group-variance. Cross-level #' interaction effects are group-level factors that explain the #' variance in random slopes (Aguinis et al. 2013). +#' \cr \cr +#' If \code{adjusted = TRUE}, the variance of the fixed and random +#' effects as well as of the additive dispersion and +#' distribution-specific variance are returned (see \cite{Johnson et al. 2014} +#' and \cite{Nakagawa et al. 2017}): +#' \describe{ +#' \item{\code{"fixed"}}{variance attributable to the fixed effects} +#' \item{\code{"random"}}{variance of random effects} +#' \item{\code{"dispersion"}}{variance due to additive dispersion} +#' \item{\code{"distribution"}}{distribution-specific variance} +#' \item{\code{"residual"}}{sum of dispersion and distribution} +#' } #' #' @seealso \code{\link{icc}} #' @@ -794,6 +812,7 @@ icc.brmsfit <- function(x, re.form = NULL, typical = "mean", prob = .89, ppd = F #' #' # all random effect variance components #' re_var(fit1) +#' re_var(fit1, adjusted = TRUE) #' #' # just the rand. slope-intercept covariance #' get_re_var(fit1, "tau.01") @@ -806,20 +825,40 @@ icc.brmsfit <- function(x, re.form = NULL, typical = "mean", prob = .89, ppd = F #' @importFrom purrr map map2 flatten_dbl flatten_chr #' @importFrom sjmisc trim #' @export -re_var <- function(x) { - # iterate all attributes and return them as vector - rv <- c("sigma_2", "tau.00", "tau.11", "tau.01", "rho.01") +re_var <- function(x, adjusted = FALSE) { - # compute icc - icc_ <- suppressMessages(icc(x)) + if (adjusted) { - rv_ <- purrr::map(rv, ~ attr(icc_, .x, exact = TRUE)) - rn <- purrr::map2(1:length(rv_), rv, ~ sjmisc::trim(paste(names(rv_[[.x]]), .y, sep = "_"))) - rv_ <- purrr::flatten_dbl(rv_) + rv <- r2(x) - names(rv_) <- purrr::flatten_chr(rn)[1:length(rv_)] + rv_ <- list( + var.fixef = attr(rv, "var.fixef", exact = TRUE), + var.ranef = attr(rv, "var.ranef", exact = TRUE), + var.disp = attr(rv, "var.disp", exact = TRUE), + var.dist = attr(rv, "var.dist", exact = TRUE), + var.resid = attr(rv, "var.resid", exact = TRUE), + formula = attr(rv, "formula", exact = TRUE), + family = attr(rv, "family", exact = TRUE), + link = attr(rv, "link", exact = TRUE) + ) + + class(rv_) <- c("sj_revar_adjust", class(rv_)) + + } else { + # iterate all attributes and return them as vector + rv <- c("sigma_2", "tau.00", "tau.11", "tau.01", "rho.01") - class(rv_) <- c("sj_revar", class(rv_)) + # compute icc + icc_ <- suppressMessages(icc(x)) + + rv_ <- purrr::map(rv, ~ attr(icc_, .x, exact = TRUE)) + rn <- purrr::map2(1:length(rv_), rv, ~ sjmisc::trim(paste(names(rv_[[.x]]), .y, sep = "_"))) + rv_ <- purrr::flatten_dbl(rv_) + + names(rv_) <- purrr::flatten_chr(rn)[1:length(rv_)] + + class(rv_) <- c("sj_revar", class(rv_)) + } rv_ } diff --git a/R/pred_vars.R b/R/pred_vars.R index facfb45f..3a54dc8a 100644 --- a/R/pred_vars.R +++ b/R/pred_vars.R @@ -3,7 +3,7 @@ #' #' @description Several functions to retrieve information from model objects, #' like variable names, link-inverse function, model frame, -#' model_family etc., in a tidy and consistent way. +#' model family etc., in a tidy and consistent way. #' #' @param x A fitted model; for \code{var_names()}, \code{x} may also be a #' character vector. diff --git a/R/r2.R b/R/r2.R index 06d09a28..2dca5c12 100644 --- a/R/r2.R +++ b/R/r2.R @@ -506,18 +506,17 @@ r2_mixedmodel <- function(x, type = NULL) { } - # Test for non-zero random effects + # Test for non-zero random effects ((near) singularity) - if (any(sapply(vals$vc, function(x) any(diag(x) == 0)))) { - ## TODO test more generally for singularity, via theta? - warning("Some variance components equal zero.\n Solution: Respecify random structure!", call. = F) + if (is_singular(x)) { + warning(sprintf("Can't compute %s. Some variance components equal zero.\n Solution: Respecify random structure!", ws2), call. = F) return(NULL) } # Get variance of fixed effects: multiply coefs by design matrix - varF <- with(vals, stats::var(as.vector(beta %*% t(X)))) + var.fixef <- get_fixef_variance(vals) # Are random slopes present as fixed effects? Warn. @@ -532,7 +531,7 @@ r2_mixedmodel <- function(x, type = NULL) { } if (!all(random.slopes %in% names(vals$beta))) - warning(sprintf("Random slopes not present as fixed effects. This artificially inflates the conditional %s.\n Solution: Respecify fixed structure!", ws2), call. = FALSE) + warning(sprintf("Random slopes not present as fixed effects. This artificially inflates the conditional %s.\n Solution: Respecify fixed structure!", ws2), call. = FALSE) # Separate observation variance from variance of random effects @@ -543,23 +542,21 @@ r2_mixedmodel <- function(x, type = NULL) { # Variance of random effects - varRand <- get_ranef_variance(not.obs.terms, x = x, vals = vals) + var.ranef <- get_ranef_variance(not.obs.terms, x = x, vals = vals) # Residual variance, which is defined as the variance due to - # additive dispersion and the distribution‐specific variance (Johnson 2015) - varDist <- get_residual_variance(x, var.cor = vals$vc, faminfo, type = ws2) + # additive dispersion and the distribution-specific variance (Johnson et al. 2014) - if (faminfo$is_linear) { - varDisp <- 0 - } else { - varDisp <- if (length(obs.terms) == 0 ) 0 else get_ranef_variance(obs.terms, x = x, vals = vals) - } + var.dist <- get_residual_variance(x, var.cor = vals$vc, faminfo, type = ws2) + var.disp <- get_disp_variance(x = x, vals = vals, faminfo = faminfo, obs.terms = obs.terms) + + var.resid <- var.dist + var.disp # Calculate R2 values - rsq.marginal <- varF / (varF + varRand + varDisp + varDist) - rsq.conditional <- (varF + varRand) / (varF + varRand + varDisp + varDist) + rsq.marginal <- var.fixef / (var.fixef + var.ranef + var.resid) + rsq.conditional <- (var.fixef + var.ranef) / (var.fixef + var.ranef + var.resid) names(rsq.marginal) <- "Marginal R2" names(rsq.conditional) <- "Conditional R2" @@ -567,8 +564,8 @@ r2_mixedmodel <- function(x, type = NULL) { # Calculate ICC values - icc.adjusted <- varRand / (varRand + varDisp + varDist) - icc.conditional <- varRand / (varF + varRand + varDisp + varDist) + icc.adjusted <- var.ranef / (var.ranef + var.resid) + icc.conditional <- var.ranef / (var.fixef + var.ranef + var.resid) names(icc.adjusted) <- "Adjusted ICC" names(icc.conditional) <- "Conditional ICC" @@ -595,6 +592,14 @@ r2_mixedmodel <- function(x, type = NULL) { } + # save variance information + + attr(var.measure, "var.fixef") <- var.fixef + attr(var.measure, "var.ranef") <- var.ranef + attr(var.measure, "var.disp") <- var.disp + attr(var.measure, "var.dist") <- var.dist + attr(var.measure, "var.resid") <- var.resid + attr(var.measure, "family") <- faminfo$family attr(var.measure, "link") <- faminfo$link.fun attr(var.measure, "formula") <- stats::formula(x) @@ -603,7 +608,7 @@ r2_mixedmodel <- function(x, type = NULL) { } -#' @importFrom stats nobs +#' @importFrom stats nobs logLik r2glm <- function(x, L.base) { L.full <- stats::logLik(x) D.full <- -2 * L.full diff --git a/R/r2_helper.R b/R/r2_helper.R index 991203db..a97e5475 100644 --- a/R/r2_helper.R +++ b/R/r2_helper.R @@ -23,7 +23,15 @@ get_ranef_variance <- function(terms, x, vals) { } -# Get residual variance from random effects +# get fixed effects variance + +#' @importFrom stats var +get_fixef_variance <- function(vals) { + with(vals, stats::var(as.vector(beta %*% t(X)))) +} + + +# Get residual (distribution specific) variance from random effects get_residual_variance <- function(x, var.cor, faminfo, type) { @@ -61,6 +69,20 @@ get_residual_variance <- function(x, var.cor, faminfo, type) { } +# get dispersion-specific variance + +get_disp_variance <- function(x, vals, faminfo, obs.terms) { + if (faminfo$is_linear) { + 0 + } else { + if (length(obs.terms) == 0 ) + 0 + else + get_ranef_variance(obs.terms, x = x, vals = vals) + } +} + + # helper-function, telling user if model is supported or not badlink <- function(link, family) { diff --git a/README.md b/README.md index d1f3a2e0..f7fb58f5 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ Please visit [https://strengejacke.github.io/sjstats/](https://strengejacke.gith To install the latest development snapshot (see latest changes below), type following commands into the R console: ```r -library(devtools) -devtools::install_github("strengejacke/sjstats") +library(githubinstall) +githubinstall::githubinstall("sjstats") ``` Please note the package dependencies when installing from GitHub. The GitHub version of this package may depend on latest GitHub versions of my other packages, so you may need to install those first, if you encounter any problems. Here's the order for installing packages from GitHub: diff --git a/docs/articles/anova-statistics.html b/docs/articles/anova-statistics.html index f7441569..f88a3166 100644 --- a/docs/articles/anova-statistics.html +++ b/docs/articles/anova-statistics.html @@ -30,7 +30,7 @@ sjstats - 0.17.1 + 0.17.2 @@ -89,7 +89,7 @@

Statistics for Anova Tables

Daniel Lüdecke

-

2018-10-02

+

2018-11-15

Source: vignettes/anova-statistics.Rmd @@ -180,19 +180,19 @@

Complete Statistical Table Output

The anova_stats() function takes a model input and computes a comprehensive summary, including the above effect size measures, returned as tidy data frame:

anova_stats(fit)
-#>        term power  df      sumsq     meansq statistic p.value etasq partial.etasq omegasq partial.omegasq cohens.f
-#> 1    e42dep  1.00   3  577756.33 192585.444   108.786   0.000 0.266         0.281   0.263           0.278    0.626
-#> 2  c172code  0.63   2   11722.05   5861.024     3.311   0.037 0.005         0.008   0.004           0.005    0.089
-#> 3   c160age  1.00   1  105169.60 105169.595    59.408   0.000 0.048         0.066   0.048           0.065    0.267
-#> 4 Residuals    NA 834 1476436.34   1770.307        NA      NA    NA            NA      NA              NA       NA
+#> term df sumsq meansq statistic p.value etasq partial.etasq omegasq partial.omegasq cohens.f power +#> 1 e42dep 3 577756.33 192585.444 108.786 0.000 0.266 0.281 0.263 0.278 0.626 1.00 +#> 2 c172code 2 11722.05 5861.024 3.311 0.037 0.005 0.008 0.004 0.005 0.089 0.63 +#> 3 c160age 1 105169.60 105169.595 59.408 0.000 0.048 0.066 0.048 0.065 0.267 1.00 +#> 4 Residuals 834 1476436.34 1770.307 NA NA NA NA NA NA NA NA

Like the other functions, the input may also be an object of class anova, so you can also use model fits from the car package, which allows fitting Anova’s with different types of sum of squares:

anova_stats(car::Anova(fit, type = 3))
-#>          term power       sumsq     meansq  df statistic p.value etasq partial.etasq omegasq partial.omegasq cohens.f
-#> 1 (Intercept) 0.973   26851.070  26851.070   1    15.167   0.000 0.013         0.018   0.012           0.017    0.135
-#> 2      e42dep 1.000  426461.571 142153.857   3    80.299   0.000 0.209         0.224   0.206           0.220    0.537
-#> 3    c172code 0.429    7352.049   3676.025   2     2.076   0.126 0.004         0.005   0.002           0.003    0.071
-#> 4     c160age 1.000  105169.595 105169.595   1    59.408   0.000 0.051         0.066   0.051           0.065    0.267
-#> 5   Residuals    NA 1476436.343   1770.307 834        NA      NA    NA            NA      NA              NA       NA
+#> term sumsq meansq df statistic p.value etasq partial.etasq omegasq partial.omegasq cohens.f power +#> 1 (Intercept) 26851.070 26851.070 1 15.167 0.000 0.013 0.018 0.012 0.017 0.135 0.973 +#> 2 e42dep 426461.571 142153.857 3 80.299 0.000 0.209 0.224 0.206 0.220 0.537 1.000 +#> 3 c172code 7352.049 3676.025 2 2.076 0.126 0.004 0.005 0.002 0.003 0.071 0.429 +#> 4 c160age 105169.595 105169.595 1 59.408 0.000 0.051 0.066 0.051 0.065 0.267 1.000 +#> 5 Residuals 1476436.343 1770.307 834 NA NA NA NA NA NA NA NA

@@ -208,9 +208,9 @@

# uses bootstrapping - here, for speed, just 100 samples omega_sq(fit, partial = TRUE, ci.lvl = .9, n = 100) #> term partial.omegasq conf.low conf.high -#> 1 e42dep 0.278 0.232 0.336 -#> 2 c172code 0.005 -0.005 0.022 -#> 3 c160age 0.065 0.034 0.100

+#> 1 e42dep 0.278 0.225 0.334 +#> 2 c172code 0.005 -0.005 0.023 +#> 3 c160age 0.065 0.033 0.100

diff --git a/docs/articles/bayesian-statistics.html b/docs/articles/bayesian-statistics.html index dacf1110..1f83fe71 100644 --- a/docs/articles/bayesian-statistics.html +++ b/docs/articles/bayesian-statistics.html @@ -30,7 +30,7 @@ sjstats - 0.17.1 + 0.17.2

@@ -89,7 +89,7 @@

Statistics for Bayesian Models

Daniel Lüdecke

-

2018-10-02

+

2018-11-15

Source: vignettes/bayesian-statistics.Rmd @@ -125,8 +125,8 @@

2018-10-02

# load sample data data(jobs) data(efc) -efc <- to_factor(efc, e42dep, c172code, c161sex, e15relat) -zinb <- read.csv("http://stats.idre.ucla.edu/stat/data/fish.csv") +data(fish) +efc <- to_factor(efc, e42dep, c172code, c161sex, e15relat) # linear models, for mediation analysis b1 <- lm(job_seek ~ treat + econ_hard + sex + age, data = jobs) @@ -144,7 +144,7 @@

2018-10-02

m3 <- brm( bf(count ~ child + camper + (1 | persons), zi ~ child + camper + (1 | persons)), - data = zinb, + data = fish, family = zero_inflated_poisson(), cores = 4 ) @@ -170,18 +170,18 @@

#> # Highest Density Interval #> #> HDI(90%) -#> b_jobseek_Intercept [ 3.45 3.87] -#> b_depress2_Intercept [ 1.97 2.45] +#> b_jobseek_Intercept [ 3.47 3.87] +#> b_depress2_Intercept [ 1.95 2.44] #> b_jobseek_treat [-0.02 0.15] -#> b_jobseek_econ_hard [ 0.01 0.09] -#> b_jobseek_sex [-0.09 0.08] +#> b_jobseek_econ_hard [ 0.02 0.10] +#> b_jobseek_sex [-0.09 0.07] #> b_jobseek_age [ 0.00 0.01] #> b_depress2_treat [-0.11 0.03] -#> b_depress2_job_seek [-0.29 -0.20] -#> b_depress2_econ_hard [ 0.12 0.18] +#> b_depress2_job_seek [-0.29 -0.19] +#> b_depress2_econ_hard [ 0.11 0.18] #> b_depress2_sex [ 0.04 0.17] #> b_depress2_age [-0.00 0.00] -#> sigma_jobseek [ 0.70 0.75] +#> sigma_jobseek [ 0.70 0.76] #> sigma_depress2 [ 0.59 0.64] hdi(m2, prob = c(.5, .89)) @@ -189,18 +189,18 @@

#> # Highest Density Interval #> #> HDI(50%) HDI(89%) -#> b_jobseek_Intercept [ 3.58 3.75] [ 3.48 3.88] -#> b_depress2_Intercept [ 2.10 2.30] [ 1.97 2.44] -#> b_jobseek_treat [ 0.04 0.11] [-0.02 0.15] -#> b_jobseek_econ_hard [ 0.04 0.07] [ 0.02 0.09] -#> b_jobseek_sex [-0.04 0.03] [-0.09 0.07] +#> b_jobseek_Intercept [ 3.60 3.76] [ 3.47 3.86] +#> b_depress2_Intercept [ 2.11 2.31] [ 1.97 2.45] +#> b_jobseek_treat [ 0.04 0.11] [-0.01 0.15] +#> b_jobseek_econ_hard [ 0.04 0.07] [ 0.01 0.09] +#> b_jobseek_sex [-0.04 0.03] [-0.09 0.06] #> b_jobseek_age [ 0.00 0.01] [ 0.00 0.01] -#> b_depress2_treat [-0.07 -0.01] [-0.12 0.02] -#> b_depress2_job_seek [-0.26 -0.22] [-0.28 -0.20] -#> b_depress2_econ_hard [ 0.14 0.16] [ 0.12 0.18] -#> b_depress2_sex [ 0.08 0.14] [ 0.04 0.17] +#> b_depress2_treat [-0.07 -0.01] [-0.11 0.03] +#> b_depress2_job_seek [-0.26 -0.22] [-0.28 -0.19] +#> b_depress2_econ_hard [ 0.13 0.16] [ 0.11 0.18] +#> b_depress2_sex [ 0.08 0.13] [ 0.04 0.17] #> b_depress2_age [-0.00 0.00] [-0.00 0.00] -#> sigma_jobseek [ 0.71 0.73] [ 0.70 0.75] +#> sigma_jobseek [ 0.71 0.74] [ 0.70 0.75] #> sigma_depress2 [ 0.60 0.62] [ 0.59 0.64]

For multilevel models, the type-argument defines whether the HDI of fixed, random or all effects are shown.

+#> r_e15relat.1.Intercept. [-0.10 1.41] +#> r_e15relat.2.Intercept. [-0.21 1.02] +#> r_e15relat.3.Intercept. [-0.88 0.76] +#> r_e15relat.4.Intercept. [-0.59 0.85] +#> r_e15relat.5.Intercept. [-0.89 0.81] +#> r_e15relat.6.Intercept. [-1.72 0.28] +#> r_e15relat.7.Intercept. [-1.13 0.91] +#> r_e15relat.8.Intercept. [-0.88 0.50]

The computation for the HDI is based on the code from Kruschke 2015, pp. 727f. For default sampling in Stan (4000 samples), the 90% intervals for HDI are more stable than, for instance, 95% intervals. An effective sample size of at least 10.000 is recommended if 95% intervals should be computed (see Kruschke 2015, p. 183ff).

rope() does not suggest limits for the region of practical equivalence and does not tell you how big is practically equivalent to the null value. However, there are suggestions how to choose reasonable limits (see Kruschke 2018), which are implemented in the equi_test() functions.

@@ -253,14 +253,14 @@

#> Samples: 4000 #> #> H0 %inROPE HDI(95%) -#> b_Intercept (*) reject 0.00 [ 7.59 9.90] -#> b_e42dep2 (*) undecided 9.10 [ 0.07 2.12] -#> b_e42dep3 (*) reject 0.00 [ 1.30 3.28] -#> b_e42dep4 (*) reject 0.00 [ 2.77 4.91] +#> b_Intercept (*) reject 0.00 [ 7.35 9.81] +#> b_e42dep2 (*) undecided 7.70 [ 0.06 2.08] +#> b_e42dep3 (*) reject 0.00 [ 1.32 3.26] +#> b_e42dep4 (*) reject 0.00 [ 2.82 4.93] #> b_c12hour accept 100.00 [ 0.00 0.01] -#> b_c172code2 undecided 72.80 [-0.43 0.77] -#> b_c172code3 undecided 22.02 [-0.08 1.45] -#> sigma reject 0.00 [ 3.41 3.75] +#> b_c172code2 (*) undecided 72.42 [-0.44 0.76] +#> b_c172code3 (*) undecided 21.93 [-0.10 1.43] +#> sigma reject 0.00 [ 3.41 3.76] #> #> (*) the number of effective samples may be insufficient for some parameters

For models with binary outcome, there is no concrete way to derive the effect size that defines the ROPE limits. Two examples from Kruschke suggest that a negligible change is about .05 on the logit-scale. In these cases, it is recommended to specify the rope argument, however, if not specified, the ROPE limits are calculated in this way: 0 +/- .1 * sd(intercept) / 4. For all other models, 0 +/- .1 * sd(intercept) is used to determine the ROPE limits. These formulas are based on experience that worked well in real-life situations, but are most likely not generally the best approach.

@@ -285,16 +285,16 @@

#> ## Conditional Model: #> #> estimate std.error HDI(89%) ratio rhat mcse -#> Intercept 1.22 0.78 [-0.32 2.65] 0.13 1.01 0.04 -#> child -1.15 0.09 [-1.31 -1.01] 0.91 1.00 0.00 -#> camper 0.73 0.09 [ 0.59 0.89] 0.98 1.00 0.00 +#> Intercept 1.22 0.72 [-0.20 2.55] 0.22 1 0.03 +#> child -1.15 0.09 [-1.30 -1.01] 0.99 1 0.00 +#> camper 0.73 0.09 [ 0.58 0.89] 0.86 1 0.00 #> #> ## Zero-Inflated Model: #> #> estimate std.error HDI(89%) ratio rhat mcse -#> Intercept -0.70 0.70 [-1.96 0.57] 0.37 1.01 0.02 -#> child 1.88 0.33 [ 1.37 2.43] 0.71 1.00 0.01 -#> camper -0.83 0.36 [-1.41 -0.28] 0.62 1.00 0.01 +#> Intercept -0.66 0.70 [-1.93 0.59] 0.41 1 0.02 +#> child 1.89 0.33 [ 1.30 2.38] 0.81 1 0.01 +#> camper -0.84 0.38 [-1.45 -0.26] 0.59 1 0.01

Additional statistics in the output are: