From d5b66148093f192528ff08e01a8fb798b451d6cb Mon Sep 17 00:00:00 2001 From: tzoltak Date: Fri, 9 Oct 2020 22:48:00 +0200 Subject: [PATCH 1/2] better handling of svyglm objects and factor predictors --- DESCRIPTION | 12 ++-- NAMESPACE | 1 + NEWS.md | 6 ++ R/coef.R | 4 ++ R/dydx.R | 93 +++++++++++++++++-------------- R/find_terms_in_model.R | 26 +++++---- R/marginal_effects_clm.R | 26 +++++---- R/marginal_effects_default.R | 24 ++++---- R/marginal_effects_glm.R | 28 +++++----- R/marginal_effects_merMod.R | 18 +++--- R/marginal_effects_multinom.R | 24 ++++---- R/marginal_effects_nnet.R | 24 ++++---- R/marginal_effects_polr.R | 24 ++++---- R/margins.R | 37 ++++++------ R/margins_svyglm.R | 44 +++++++-------- man/margins.Rd | 5 +- tests/testthat/tests-core.R | 23 ++++---- tests/testthat/tests-edge-cases.R | 12 ++++ tests/testthat/tests-methods.R | 78 +++++++++++++++++++------- 19 files changed, 298 insertions(+), 211 deletions(-) create mode 100644 R/coef.R diff --git a/DESCRIPTION b/DESCRIPTION index 3961a1a..b13fe25 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,8 +4,8 @@ Title: Marginal Effects for Model Objects Description: An R port of Stata's 'margins' command, which can be used to calculate marginal (or partial) effects from model objects. License: MIT + file LICENSE -Version: 0.3.25 -Date: 2018-07-31 +Version: 0.3.26 +Date: 2018-09-10 Authors@R: c(person("Thomas J.", "Leeper", role = c("aut", "cre"), email = "thosjleeper@gmail.com", @@ -18,7 +18,11 @@ Authors@R: c(person("Thomas J.", "Leeper", person("Jacob A.", "Long", role = c("ctb"), email = "long.1377@osu.edu", - comment = c(ORCID = "0000-0002-1582-6214")) + comment = c(ORCID = "0000-0002-1582-6214"), + person("Tomasz", ""\u017b\u00F3\u0142tak"", + role = c("ctb"), + email = "tomek@zozlak.org", + comment = c(ORCID = "0000-0003-1354-4472")) ) URL: https://github.com/leeper/margins BugReports: https://github.com/leeper/margins/issues @@ -48,4 +52,4 @@ Enhances: survey ByteCompile: true VignetteBuilder: knitr -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index 87765d7..ded66b1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(coef,margins) S3method(confint,margins) S3method(cplot,clm) S3method(cplot,default) diff --git a/NEWS.md b/NEWS.md index 9e10322..c107aeb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +## margins 0.3.26 + +* More Straightforward handling of "svyglm" objects from package **survey** (remove the `design` argument from `margins.svyglm()`). #124 +* Better handling of factor predictors. #116, #121, #125 +* Added `coef()` method for "margins" class. #131 + ## margins 0.3.25 * Setup a `cplot.default()` method and modified documentation of `cplot()`, `image()`, and `persp()` methods slightly. (#84, h/t Luke Sonnet) diff --git a/R/coef.R b/R/coef.R new file mode 100644 index 0000000..61b1874 --- /dev/null +++ b/R/coef.R @@ -0,0 +1,4 @@ +#' @export +coef.margins <- function(object, ...) { + summary(object)$AME +} diff --git a/R/dydx.R b/R/dydx.R index 279adc7..043cfda 100644 --- a/R/dydx.R +++ b/R/dydx.R @@ -12,32 +12,32 @@ #' @param \dots Ignored. #' @details #' These functions provide a simple interface to the calculation of marginal effects for specific variables used in a model, and are the workhorse functions called internally by \code{\link{marginal_effects}}. -#' +#' #' \code{dydx} is an S3 generic with classes implemented for specific variable types. S3 method dispatch, somewhat atypically, is based upon the class of \code{data[[variable]]}. -#' +#' #' For numeric (and integer) variables, the method calculates an instantaneous marginal effect using a simple \dQuote{central difference} numerical differentiation: #' \deqn{\frac{f(x + \frac{1}{2}h) - f(x - \frac{1}{2}h)}{dh}}{(f(x + 0.5h) - f(x - 0.5h))/(2h)}, where (\eqn{h = \max(|x|, 1) \sqrt{\epsilon}}{h = max(|x|, 1)sqrt(epsilon)} and the value of \eqn{\epsilon}{epsilon} is given by argument \code{eps}. This procedure is subject to change in the future. -#' +#' #' For factor variables (or character variables, which are implicitly coerced to factors by modelling functions), discrete first-differences in predicted outcomes are reported instead (i.e., change in predicted outcome when factor is set to a given level minus the predicted outcome when the factor is set to its baseline level). These are sometimes called \dQuote{partial effects}. If you want to use numerical differentiation for factor variables (which you probably do not want to do), enter them into the original modelling function as numeric values rather than factors. -#' +#' #' For ordered factor variables, the same approach as factors is used. This may contradict the output of modelling function summaries, which rely on \code{options("contrasts")} to determine the contrasts to use (the default being \code{\link[stats]{contr.poly}} rather than \code{\link[stats]{contr.treatment}}, the latter being used normally for unordered factors). -#' +#' #' For logical variables, the same approach as factors is used, but always moving from \code{FALSE} to \code{TRUE}. -#' +#' #' @return A data frame, typically with one column unless the variable is a factor with more than two levels. The names of the marginal effect columns begin with \dQuote{dydx_} to distinguish them from the substantive variables of the same names. #' @references #' Miranda, Mario J. and Paul L. Fackler. 2002. \emph{Applied Computational Economics and Finance}. p. 103. -#' +#' #' Greene, William H. 2012. \emph{Econometric Analysis}. 7th edition. pp. 733--741. -#' +#' #' Cameron, A. Colin and Pravin K. Trivedi. 2010. \emph{Microeconometric Using Stata}. Revised edition. pp. 106--108, 343--356, 476--478. -#' +#' #' @examples #' require("datasets") #' x <- lm(mpg ~ cyl * hp + wt, data = head(mtcars)) #' # marginal effect (numerical derivative) #' dydx(head(mtcars), x, "hp") -#' +#' #' # other discrete differences #' ## change from min(mtcars$hp) to max(mtcars$hp) #' dydx(head(mtcars), x, "hp", change = "minmax") @@ -47,12 +47,12 @@ #' dydx(head(mtcars), x, "hp", change = "sd") #' ## change between arbitrary values of mtcars$hp #' dydx(head(mtcars), x, "hp", change = c(75,150)) -#' +#' #' # factor variables #' mtcars[["cyl"]] <- factor(mtcars$cyl) #' x <- lm(mpg ~ cyl, data = head(mtcars)) #' dydx(head(mtcars), x, "cyl") -#' +#' #' @seealso \code{\link{marginal_effects}}, \code{\link{margins}} #' @importFrom prediction prediction #' @importFrom data.table rbindlist @@ -63,16 +63,16 @@ dydx <- function(data, model, variable, ...) { #' @rdname dydx #' @export -dydx.default <- -function(data, - model, - variable, - type = c("response", "link"), +dydx.default <- +function(data, + model, + variable, + type = c("response", "link"), change = c("dydx", "minmax", "iqr", "sd"), eps = 1e-7, as.data.frame = TRUE, ...) { - + if (is.numeric(change)) { stopifnot(length(change) == 2) lwr <- change[1] @@ -86,14 +86,14 @@ function(data, warning(paste0("Class of variable, ", variable, ", is unrecognized. Returning NA.")) return(rep(NA_real_, nrow(data))) } - + d0 <- d1 <- data - + # set value of `h` based on `eps` to deal with machine precision setstep <- function(x) { x + (max(abs(x), 1, na.rm = TRUE) * sqrt(eps)) - x } - + if (change == "dydx") { # calculate numerical derivative d0[[variable]] <- d0[[variable]] - setstep(d0[[variable]]) @@ -118,26 +118,26 @@ function(data, d0[[variable]] <- lwr d1[[variable]] <- upr } - + if (!is.null(type)) { type <- type[1L] pred <- prediction(model = model, data = data.table::rbindlist(list(d0, d1)), type = type, calculate_se = FALSE, ...)[["fitted"]] } else { pred <- prediction(model = model, data = data.table::rbindlist(list(d0, d1)), calculate_se = FALSE, ...)[["fitted"]] } - + if (change == "dydx") { out <- (pred[nrow(d0) + seq_len(nrow(d0))] - pred[seq_len(nrow(d0))]) / (d1[[variable]] - d0[[variable]]) } else { out <- (pred[nrow(d0) + seq_len(nrow(d0))] - pred[seq_len(nrow(d0))]) } class(out) <- c("marginaleffect", "numeric") - + # return data.frame (or matrix) with column of derivatives if (isTRUE(as.data.frame)) { return(structure(list(out), names = paste0("dydx_",variable), - class = c("data.frame"), + class = c("data.frame"), row.names = seq_len(nrow(data)))) } else { return(structure(matrix(out, ncol = 1L), dimnames = list(seq_len(length(out)), paste0("dydx_",variable)))) @@ -146,7 +146,7 @@ function(data, #' @rdname dydx #' @export -dydx.factor <- +dydx.factor <- function(data, model, variable, @@ -155,27 +155,36 @@ function(data, as.data.frame = TRUE, ... ) { - - levs <- levels(as.factor(data[[variable]])) - base <- levs[1L] - levs <- levs[-1L] - + i <- 1 + levs = try(find_data(model, parent.frame(i))[[variable]], silent = TRUE) + while (inherits(levs, "try-error")) { + i <- i + 1 + levs = try(find_data(model, parent.frame(i))[[variable]], silent = TRUE) + } + levs <- sort(unique(levs)) + if (variable %in% names(attributes(data)$at)) { + base <- factor(attributes(data)$at[[variable]], levs) + } else { + base <- levs[1L] + } + levs <- levs[levs != base] + # setup response object if (isTRUE(fwrap)) { outcolnames <- paste0("factor(", variable, ")", levs) } else { outcolnames <- paste0("dydx_", variable, levs) } - + if (isTRUE(as.data.frame)) { - out <- structure(rep(list(list()), length(levs)), - class = "data.frame", - names = outcolnames, + out <- structure(rep(list(list()), length(levs)), + class = "data.frame", + names = outcolnames, row.names = seq_len(nrow(data))) } else { out <- matrix(NA_real_, nrow = nrow(data), ncol = length(levs), dimnames = list(seq_len(nrow(data)), outcolnames)) } - + # setup base data and prediction d0 <- d1 <- data d0[[variable]] <- base @@ -200,7 +209,7 @@ function(data, out[, outcolnames[i]] <- pred1 - pred0 } } - + # return data.frame (or matrix) with column of derivatives return(out) } @@ -211,7 +220,7 @@ dydx.ordered <- dydx.factor #' @rdname dydx #' @export -dydx.logical <- +dydx.logical <- function(data, model, variable, @@ -219,12 +228,12 @@ function(data, as.data.frame = TRUE, ... ) { - + # setup base data and prediction d0 <- d1 <- data d0[[variable]] <- FALSE d1[[variable]] <- TRUE - + # calculate difference for moving FALSE to TRUE if (!is.null(type)) { type <- type[1L] @@ -233,13 +242,13 @@ function(data, pred <- prediction(model = model, data = data.table::rbindlist(list(d0, d1)), calculate_se = FALSE, ...)[["fitted"]] } out <- structure(pred[nrow(d0) + seq_len(nrow(d0))] - pred[seq_len(nrow(d0))], class = c("marginaleffect", "numeric")) - + # return data.frame (or matrix) with column of derivatives class(out) <- c("marginaleffect", "numeric") if (isTRUE(as.data.frame)) { return(structure(list(out), names = paste0("dydx_",variable), - class = c("data.frame"), + class = c("data.frame"), row.names = seq_len(nrow(data)))) } else { return(structure(matrix(out, ncol = 1L), dimnames = list(seq_len(length(out)), paste0("dydx_",variable)))) diff --git a/R/find_terms_in_model.R b/R/find_terms_in_model.R index 110f309..e46f05d 100644 --- a/R/find_terms_in_model.R +++ b/R/find_terms_in_model.R @@ -1,14 +1,15 @@ # function to identify terms in model formula -## @return A three-element list containing: +## @return A four-element list containing: ## - `nnames` (numeric variables) ## - `lnames` (logical variables) ## - `fnames` (factor variables) +## - `all` all variables in the order they appear in a model formula find_terms_in_model <- function(model, variables = NULL) { UseMethod("find_terms_in_model") } find_terms_in_model.default <- function(model, variables = NULL) { - + # identify classes of terms in `model` if (!is.null(attributes(terms(model))[["dataClasses"]])) { ## first look in the `terms(model)` @@ -29,15 +30,15 @@ find_terms_in_model.default <- function(model, variables = NULL) { stop("No variable classes found in model.") } } - + # drop specially named "(weights)" variables classes <- classes[!names(classes) %in% "(weights)"] - + # handle character variables as factors classes[classes == "character"] <- "factor" ## cleanup names of terms names(classes) <- clean_terms(names(classes)) - + # drop instruments, if applicable if (inherits(model, "ivreg")) { regressors <- clean_terms(attr(model$terms$regressors, "term.labels")) @@ -45,14 +46,15 @@ find_terms_in_model.default <- function(model, variables = NULL) { instruments <- instruments[!instruments %in% regressors] classes <- classes[!names(classes) %in% instruments] } - + # identify factors versus numeric terms in `model`, and examine only unique terms vars <- list( nnames = unique(names(classes)[!classes %in% c("factor", "ordered", "logical")]), lnames = unique(names(classes)[classes == "logical"]), - fnames = unique(names(classes)[classes %in% c("factor", "ordered")]) + fnames = unique(names(classes)[classes %in% c("factor", "ordered")]), + all = unique(names(classes)) ) - + # subset of variables for which to compute the marginal effects if (!is.null(variables)) { tmp <- c(vars$nnames, vars$lnames, vars$fnames) @@ -63,7 +65,7 @@ find_terms_in_model.default <- function(model, variables = NULL) { vars$lnames <- vars$lnames[vars$lnames %in% variables] vars$fnames <- vars$fnames[vars$fnames %in% variables] } - + # check whether the list is completely NULL if (is.null(unlist(vars))) { stop("No variables found in model.") @@ -72,13 +74,13 @@ find_terms_in_model.default <- function(model, variables = NULL) { } find_terms_in_model.merMod <- function(model, variables = NULL) { - + # require lme4 package in order to identify random effects terms requireNamespace("lme4") - + # call default method varslist <- find_terms_in_model.default(model, variables) - + # sanitize `varlist` to remove random effects terms fixed <- all.vars(formula(model, fixed.only = TRUE))[-1L] varslist[] <- lapply(varslist, function(vartype) vartype[vartype %in% fixed]) diff --git a/R/marginal_effects_clm.R b/R/marginal_effects_clm.R index 2e75d3e..91d2eb2 100644 --- a/R/marginal_effects_clm.R +++ b/R/marginal_effects_clm.R @@ -1,37 +1,39 @@ #' @rdname marginal_effects #' @importFrom prediction find_data #' @export -marginal_effects.clm <- -function(model, - data = find_data(model, parent.frame()), +marginal_effects.clm <- +function(model, + data = find_data(model, parent.frame()), variables = NULL, type = NULL, - eps = 1e-7, + eps = 1e-7, varslist = NULL, as.data.frame = TRUE, ...) { - + if (!is.null(type)) { warning(sprintf("'type' is ignored for models of class '%s'", class(model))) } - + # identify classes of terms in `model` if (is.null(varslist)) { varslist <- find_terms_in_model(model, variables = variables) } - + # estimate numerical derivatives with respect to each variable (for numeric terms in the model) # add discrete differences for logical terms - out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) - + out1 <- lapply(setNames(c(varslist$nnames, varslist$lnames), + c(varslist$nnames, varslist$lnames)), + dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) + # add discrete differences for factor terms ## exact number depends on number of factor levels - out2 <- list() + out2 <- setNames(vector(mode = "list", length = length(varslist$fnames)), varslist$fnames) for (i in seq_along(varslist$fnames)) { out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...) } - - out <- c(out1, out2) + + out <- unname(c(out1, out2)[varslist$all]) if (isTRUE(as.data.frame)) { out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))]) } else { diff --git a/R/marginal_effects_default.R b/R/marginal_effects_default.R index 8ddbf2f..7762374 100644 --- a/R/marginal_effects_default.R +++ b/R/marginal_effects_default.R @@ -1,35 +1,37 @@ #' @rdname marginal_effects #' @importFrom prediction find_data #' @export -marginal_effects.default <- -function(model, - data = find_data(model, parent.frame()), +marginal_effects.default <- +function(model, + data = find_data(model, parent.frame()), variables = NULL, type = c("response", "link"), eps = 1e-7, as.data.frame = TRUE, varslist = NULL, ...) { - + type <- type[1L] - + # identify classes of terms in `model` if (is.null(varslist)) { varslist <- find_terms_in_model(model, variables = variables) } - + # estimate numerical derivatives with respect to each variable (for numeric terms in the model) # add discrete differences for logical terms - out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = type, eps = eps, as.data.frame = as.data.frame, ...) - + out1 <- lapply(setNames(c(varslist$nnames, varslist$lnames), + c(varslist$nnames, varslist$lnames)), + dydx, data = data, model = model, type = type, eps = eps, as.data.frame = as.data.frame, ...) + # add discrete differences for factor terms ## exact number depends on number of factor levels - out2 <- list() + out2 <- setNames(vector(mode = "list", length = length(varslist$fnames)), varslist$fnames) for (i in seq_along(varslist$fnames)) { out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], type = type, fwrap = FALSE, as.data.frame = as.data.frame, ...) } - - out <- c(out1, out2) + + out <- unname(c(out1, out2)[varslist$all]) if (isTRUE(as.data.frame)) { out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))]) } else { diff --git a/R/marginal_effects_glm.R b/R/marginal_effects_glm.R index fcc0167..581d513 100644 --- a/R/marginal_effects_glm.R +++ b/R/marginal_effects_glm.R @@ -1,35 +1,37 @@ #' @rdname marginal_effects #' @importFrom prediction find_data #' @export -marginal_effects.glm <- -function(model, - data = find_data(model, parent.frame()), +marginal_effects.glm <- +function(model, + data = find_data(model, parent.frame()), variables = NULL, - type = c("response", "link"), - eps = 1e-7, + type = c("response", "link"), + eps = 1e-7, varslist = NULL, as.data.frame = TRUE, ...) { - + type <- match.arg(type) - + # identify classes of terms in `model` if (is.null(varslist)) { varslist <- find_terms_in_model(model, variables = variables) } - + # estimate numerical derivatives with respect to each variable (for numeric terms in the model) # add discrete differences for logical terms - out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = type, eps = eps, as.data.frame = as.data.frame, ...) - + out1 <- lapply(setNames(c(varslist$nnames, varslist$lnames), + c(varslist$nnames, varslist$lnames)), + dydx, data = data, model = model, type = type, eps = eps, as.data.frame = as.data.frame, ...) + # add discrete differences for factor terms ## exact number depends on number of factor levels - out2 <- list() + out2 <- setNames(vector(mode = "list", length = length(varslist$fnames)), varslist$fnames) for (i in seq_along(varslist$fnames)) { out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], type = type, fwrap = FALSE, as.data.frame = as.data.frame, ...) } - - out <- c(out1, out2) + + out <- unname(c(out1, out2)[varslist$all]) if (isTRUE(as.data.frame)) { out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))]) } else { diff --git a/R/marginal_effects_merMod.R b/R/marginal_effects_merMod.R index 0702290..1eeebbe 100644 --- a/R/marginal_effects_merMod.R +++ b/R/marginal_effects_merMod.R @@ -10,26 +10,28 @@ function(model, as.data.frame = TRUE, varslist = NULL, ...) { - + type <- match.arg(type) - + # identify classes of terms in `model` if (is.null(varslist)) { varslist <- find_terms_in_model(model, variables = variables) } - + # estimate numerical derivatives with respect to each variable (for numeric terms in the model) # add discrete differences for logical terms - out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = type, eps = eps, as.data.frame = as.data.frame, ...) - + out1 <- lapply(setNames(c(varslist$nnames, varslist$lnames), + c(varslist$nnames, varslist$lnames)), + dydx, data = data, model = model, type = type, eps = eps, as.data.frame = as.data.frame, ...) + # add discrete differences for factor terms ## exact number depends on number of factor levels - out2 <- list() + out2 <- setNames(vector(mode = "list", length = length(varslist$fnames)), varslist$fnames) for (i in seq_along(varslist$fnames)) { out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], type = type, fwrap = FALSE, as.data.frame = as.data.frame, ...) } - - out <- c(out1, out2) + + out <- unname(c(out1, out2)[varslist$all]) if (isTRUE(as.data.frame)) { out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))]) } else { diff --git a/R/marginal_effects_multinom.R b/R/marginal_effects_multinom.R index 08290f0..6e779a7 100644 --- a/R/marginal_effects_multinom.R +++ b/R/marginal_effects_multinom.R @@ -1,33 +1,35 @@ #' @rdname marginal_effects #' @importFrom prediction find_data #' @export -marginal_effects.multinom <- -function(model, - data = find_data(model, parent.frame()), +marginal_effects.multinom <- +function(model, + data = find_data(model, parent.frame()), variables = NULL, type = NULL, - eps = 1e-7, + eps = 1e-7, varslist = NULL, as.data.frame = TRUE, ...) { - + # identify classes of terms in `model` if (is.null(varslist)) { varslist <- find_terms_in_model(model, variables = variables) } - + # estimate numerical derivatives with respect to each variable (for numeric terms in the model) # add discrete differences for logical terms - out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) - + out1 <- lapply(setNames(c(varslist$nnames, varslist$lnames), + c(varslist$nnames, varslist$lnames)), + dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) + # add discrete differences for factor terms ## exact number depends on number of factor levels - out2 <- list() + out2 <- setNames(vector(mode = "list", length = length(varslist$fnames)), varslist$fnames) for (i in seq_along(varslist$fnames)) { out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...) } - - out <- c(out1, out2) + + out <- unname(c(out1, out2)[varslist$all]) if (isTRUE(as.data.frame)) { out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))]) } else { diff --git a/R/marginal_effects_nnet.R b/R/marginal_effects_nnet.R index 8443acd..819758f 100644 --- a/R/marginal_effects_nnet.R +++ b/R/marginal_effects_nnet.R @@ -1,33 +1,35 @@ #' @rdname marginal_effects #' @importFrom prediction find_data #' @export -marginal_effects.nnet <- -function(model, - data = find_data(model, parent.frame()), +marginal_effects.nnet <- +function(model, + data = find_data(model, parent.frame()), variables = NULL, type = NULL, - eps = 1e-7, + eps = 1e-7, varslist = NULL, as.data.frame = TRUE, ...) { - + # identify classes of terms in `model` if (is.null(varslist)) { varslist <- find_terms_in_model(model, variables = variables) } - + # estimate numerical derivatives with respect to each variable (for numeric terms in the model) # add discrete differences for logical terms - out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) - + out1 <- lapply(setNames(c(varslist$nnames, varslist$lnames), + c(varslist$nnames, varslist$lnames)), + dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) + # add discrete differences for factor terms ## exact number depends on number of factor levels - out2 <- list() + out2 <- setNames(vector(mode = "list", length = length(varslist$fnames)), varslist$fnames) for (i in seq_along(varslist$fnames)) { out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...) } - - out <- c(out1, out2) + + out <- unname(c(out1, out2)[varslist$all]) if (isTRUE(as.data.frame)) { out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))]) } else { diff --git a/R/marginal_effects_polr.R b/R/marginal_effects_polr.R index ac8385a..315a1f3 100644 --- a/R/marginal_effects_polr.R +++ b/R/marginal_effects_polr.R @@ -1,33 +1,35 @@ #' @rdname marginal_effects #' @importFrom prediction find_data #' @export -marginal_effects.polr <- -function(model, - data = find_data(model, parent.frame()), +marginal_effects.polr <- +function(model, + data = find_data(model, parent.frame()), variables = NULL, type = NULL, - eps = 1e-7, + eps = 1e-7, varslist = NULL, as.data.frame = TRUE, ...) { - + # identify classes of terms in `model` if (is.null(varslist)) { varslist <- find_terms_in_model(model, variables = variables) } - + # estimate numerical derivatives with respect to each variable (for numeric terms in the model) # add discrete differences for logical terms - out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) - + out1 <- lapply(setNames(c(varslist$nnames, varslist$lnames), + c(varslist$nnames, varslist$lnames)), + dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...) + # add discrete differences for factor terms ## exact number depends on number of factor levels - out2 <- list() + out2 <- setNames(vector(mode = "list", length = length(varslist$fnames)), varslist$fnames) for (i in seq_along(varslist$fnames)) { out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...) } - - out <- c(out1, out2) + + out <- unname(c(out1, out2)[varslist$all]) if (isTRUE(as.data.frame)) { out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))]) } else { diff --git a/R/margins.R b/R/margins.R index 0f92991..26ab4dc 100644 --- a/R/margins.R +++ b/R/margins.R @@ -4,13 +4,12 @@ #' @docType package #' @title Marginal Effects Estimation #' @description This package is an R port of Stata's \samp{margins} command, implemented as an S3 generic \code{margins()} for model objects, like those of class \dQuote{lm} and \dQuote{glm}. \code{margins()} is an S3 generic function for building a \dQuote{margins} object from a model object. Methods are currently implemented for several model classes (see Details, below). -#' +#' #' margins provides \dQuote{marginal effects} summaries of models. Marginal effects are partial derivatives of the regression equation with respect to each variable in the model for each unit in the data; average marginal effects are simply the mean of these unit-specific partial derivatives over some sample. In ordinary least squares regression with no interactions or higher-order term, the estimated slope coefficients are marginal effects. In other cases and for generalized linear models, the coefficients are not marginal effects at least not on the scale of the response variable. margins therefore provides ways of calculating the marginal effects of variables to make these models more interpretable. -#' +#' #' The package also provides a low-level function, \code{\link{marginal_effects}}, to estimate those quantities and return a data frame of unit-specific effects and another even lower-level function, \code{\link{dydx}}, to provide variable-specific derivatives from models. Some of the underlying architecture for the package is provided by the low-level function \code{\link[prediction]{prediction}}, which provides a consistent data frame interface to \code{\link[stats]{predict}} for a large number of model types. If a \code{prediction} method exists for a model class, \code{margin} should work for the model class but only those classes listed here have been tested and specifically supported. #' @param model A model object. See Details for supported model classes. #' @param data A data frame containing the data at which to evaluate the marginal effects, as in \code{\link[stats]{predict}}. This is optional, but may be required when the underlying modelling function sets \code{model = FALSE}. -#' @param design Only for models estimated using \code{\link[survey]{svyglm}}, the \dQuote{survey.design} object used to estimate the model. This is required. #' @param variables A character vector with the names of variables for which to compute the marginal effects. The default (\code{NULL}) returns marginal effects for all variables. #' @param at A list of one or more named vectors, specifically values at which to calculate the marginal effects. This is an analogue of Stata's \code{, at()} option. The specified values are fully combined (i.e., a cartesian product) to find AMEs for all combinations of specified variable values. Rather than a list, this can also be a data frame of combination levels if only a subset of combinations are desired. These are used to modify the value of \code{data} when calculating AMEs across specified values (see \code{\link[prediction]{build_datalist}} for details on use). Note: This does not calculate AMEs for \emph{subgroups} but rather for counterfactual datasets where all observaations take the specified values; to obtain subgroup effects, subset \code{data} directly. #' @param type A character string indicating the type of marginal effects to estimate. Mostly relevant for non-linear models, where the reasonable options are \dQuote{response} (the default) or \dQuote{link} (i.e., on the scale of the linear predictor in a GLM). @@ -21,11 +20,11 @@ #' @param eps A numeric value specifying the \dQuote{step} to use when calculating numerical derivatives. #' @param \dots Arguments passed to methods, and onward to \code{\link{dydx}} methods and possibly further to \code{\link[prediction]{prediction}} methods. This can be useful, for example, for setting \code{type} (predicted value type), \code{eps} (precision), or \code{category} (category for multi-category outcome models), etc. #' @details Methods for this generic return a \dQuote{margins} object, which is a data frame consisting of the original data, predicted values and standard errors thereof, estimated marginal effects from the model \code{model} (for all variables used in the model, or the subset specified by \code{variables}), along with attributes describing various features of the marginal effects estimates. -#' +#' #' The default print method is concise; a more useful \code{summary} method provides additional details. -#' +#' #' \code{margins_summary} is sugar that provides a more convenient way of obtaining the nested call: \code{summary(margins(...))}. -#' +#' #' Methods are currently implemented for the following object classes: #' \itemize{ #' \item \dQuote{betareg}, see \code{\link[betareg]{betareg}} @@ -40,23 +39,23 @@ #' } #' #' The \code{margins} methods simply construct a list of data frames based upon the values of \code{at} (using \code{\link[prediction]{build_datalist}}), calculate marginal effects for each data frame (via \code{\link{marginal_effects}} and, in turn, \code{\link{dydx}} and \code{\link[prediction]{prediction}}), stacks the results together, and provides variance estimates. Alternatively, you can use \code{\link{marginal_effects}} directly to only retrieve a data frame of marginal effects without constructing a \dQuote{margins} object or variance estimates. That can be efficient for plotting, etc., given the time-consuming nature of variance estimation. -#' +#' #' See \code{\link{dydx}} for details on estimation of marginal effects. -#' +#' #' The choice of \code{vce} may be important. The default variance-covariance estimation procedure (\code{vce = "delta"}) uses the delta method to estimate marginal effect variances. This is the fastest method. When \code{vce = "simulation"}, coefficient estimates are repeatedly drawn from the asymptotic (multivariate normal) distribution of the model coefficients and each draw is used to estimate marginal effects, with the variance based upon the dispersion of those simulated effects. The number of iterations used is given by \code{iterations}. For \code{vce = "bootstrap"}, the bootstrap is used to repeatedly subsample \code{data} and the variance of marginal effects is estimated from the variance of the bootstrap distribution. This method is markedly slower than the other two procedures. Again, \code{iterations} regulates the number of bootstrap subsamples to draw. Some model classes (notably \dQuote{loess}) fix \code{vce ="none"}. #' #' @return A data frame of class \dQuote{margins} containing the contents of \code{data}, predicted values from \code{model} for \code{data}, the standard errors of the predictions, and any estimated marginal effects. If \code{at = NULL} (the default), then the data frame will have a number of rows equal to \code{nrow(data)}. Otherwise, the number of rows will be a multiple thereof based upon the number of combinations of values specified in \code{at}. Columns containing marginal effects are distinguished by their name (prefixed by \code{dydx_}). These columns can be extracted from a \dQuote{margins} object using, for example, \code{marginal_effects(margins(model))}. Columns prefixed by \code{Var_} specify the variances of the \emph{average} marginal effects, whereas (optional) columns prefixed by \code{SE_} contain observation-specific standard errors. A special column, \code{_at_number}, specifies which \code{at} combination a given row corresponds to; the data frame carries an attribute \dQuote{at} that specifies which combination of values this index represents. The \code{summary.margins()} method provides for pretty printing of the results, particularly in cases where \code{at} is specified. A variance-covariance matrix for the average marginal effects is returned as an attribute (though behavior when \code{at} is non-NULL is unspecified). #' @author Thomas J. Leeper #' @references #' Greene, W.H. 2012. Econometric Analysis, 7th Ed. Boston: Pearson. -#' +#' #' Stata manual: \code{margins}. Retrieved 2014-12-15 from \url{http://www.stata.com/manuals13/rmargins.pdf}. #' @examples #' # basic example using linear model #' require("datasets") #' x <- lm(mpg ~ cyl * hp + wt, data = head(mtcars)) #' margins(x) -#' +#' #' # obtain unit-specific standard errors #' \dontrun{ #' margins(x, unit_ses = TRUE) @@ -64,27 +63,27 @@ #' #' # use of 'variables' argument to estimate only some MEs #' summary(margins(x, variables = "hp")) -#' +#' #' # use of 'at' argument #' ## modifying original data values #' margins(x, at = list(hp = 150)) #' ## AMEs at various data values #' margins(x, at = list(hp = c(95, 150), cyl = c(4,6))) -#' +#' #' # use of 'data' argument to obtain AMEs for a subset of data #' margins(x, data = mtcars[mtcars[["cyl"]] == 4,]) #' margins(x, data = mtcars[mtcars[["cyl"]] == 6,]) -#' +#' #' # return discrete differences for continuous terms #' ## passes 'change' through '...' to dydx() #' margins(x, change = "sd") -#' +#' #' # summary() method #' summary(margins(x, at = list(hp = c(95, 150)))) #' margins_summary(x, at = list(hp = c(95, 150))) #' ## control row order of summary() output #' summary(margins(x, at = list(hp = c(95, 150))), by_factor = FALSE) -#' +#' #' # alternative 'vce' estimation #' \dontrun{ #' # bootstrap @@ -92,7 +91,7 @@ #' # simulation (ala Clarify/Zelig) #' margins(x, vce = "simulation", iterations = 100L) #' } -#' +#' #' # specifying a custom `vcov` argument #' if (require("sandwich")) { #' x2 <- lm(Sepal.Length ~ Sepal.Width, data = head(iris)) @@ -105,7 +104,7 @@ #' x <- glm(am ~ hp, data = head(mtcars), family = binomial) #' margins(x, type = "response") #' margins(x, type = "link") -#' +#' #' # multi-category outcome #' if (requireNamespace("nnet")) { #' data("iris3", package = "datasets") @@ -121,14 +120,14 @@ #' list_data <- split(mtcars, mtcars$gear) #' list_mod <- lapply(list_data, function(x) lm(mpg ~ cyl + wt, data = x)) #' mapply(margins_summary, model = list_mod, data = list_data, SIMPLIFY = FALSE) -#' +#' #' @seealso \code{\link{marginal_effects}}, \code{\link{dydx}}, \code{\link[prediction]{prediction}} #' @keywords models package #' @import stats #' @importFrom prediction prediction find_data build_datalist #' @importFrom MASS mvrnorm #' @export -margins <- +margins <- function(model, ...) { UseMethod("margins") } diff --git a/R/margins_svyglm.R b/R/margins_svyglm.R index a310482..6d1da20 100644 --- a/R/margins_svyglm.R +++ b/R/margins_svyglm.R @@ -1,11 +1,10 @@ #' @rdname margins #' @export -margins.svyglm <- -function(model, - data = find_data(model, parent.frame()), - design, +margins.svyglm <- +function(model, + data = find_data(model), variables = NULL, - at = NULL, + at = NULL, type = c("response", "link"), vcov = stats::vcov(model), vce = c("delta", "simulation", "bootstrap", "none"), @@ -13,38 +12,39 @@ function(model, unit_ses = FALSE, eps = 1e-7, ...) { - + # require survey requireNamespace("survey") - + # match.arg() type <- match.arg(type) vce <- match.arg(vce) - - # `design` object - if (missing(design)) { - message("Note: Estimating marginal effects without survey weights. Specify 'design' to adjust for weighting.") - wts <- NULL - } else if (!inherits(design, "survey.design")) { - stop("'design' must be a 'survey.design' object for models of class 'svyglm'") + + if (inherits(data, c("survey.design", "svyrep.design"))) { + wts <- weights(data, type = "sampling") + data <- find_data(data) } else { - wts <- weights(design) + # these are not sampling weights, but they're proportional to sampling + # weigts, what works as well (and it's simplicty is appealing) + wts <- weights(model) + # to get literally sampling weights you should use: + # wts <- weights(model$survey.design, type = "sampling")[-na.action(model)] } - + # setup data data_list <- build_datalist(data, at = at) if (is.null(names(data_list))) { names(data_list) <- NA_character_ } at_specification <- attr(data_list, "at_specification") - + # identify classes of terms in `model` varslist <- find_terms_in_model(model, variables = variables) - + # reduce memory profile model[["model"]] <- NULL attr(model[["terms"]], ".Environment") <- NULL - + # calculate marginal effects out <- list() for (i in seq_along(data_list)) { @@ -70,10 +70,10 @@ function(model, jac <- NULL vc <- NULL } - + # return value - structure(do.call("rbind", out), - class = c("margins", "data.frame"), + structure(do.call("rbind", out), + class = c("margins", "data.frame"), at = if (is.null(at)) NULL else at_specification, type = type, call = if ("call" %in% names(model)) model[["call"]] else NULL, diff --git a/man/margins.Rd b/man/margins.Rd index e21bb09..4abca68 100644 --- a/man/margins.Rd +++ b/man/margins.Rd @@ -171,8 +171,7 @@ margins_summary(model, ..., level = 0.95, by_factor = TRUE) \method{margins}{svyglm}( model, - data = find_data(model, parent.frame()), - design, + data = find_data(model), variables = NULL, at = NULL, type = c("response", "link"), @@ -210,8 +209,6 @@ margins_summary(model, ..., level = 0.95, by_factor = TRUE) \item{level}{A numeric value specifying the confidence level for calculating p-values and confidence intervals.} \item{by_factor}{A logical specifying whether to order the output by factor (the default, \code{TRUE}).} - -\item{design}{Only for models estimated using \code{\link[survey]{svyglm}}, the \dQuote{survey.design} object used to estimate the model. This is required.} } \value{ A data frame of class \dQuote{margins} containing the contents of \code{data}, predicted values from \code{model} for \code{data}, the standard errors of the predictions, and any estimated marginal effects. If \code{at = NULL} (the default), then the data frame will have a number of rows equal to \code{nrow(data)}. Otherwise, the number of rows will be a multiple thereof based upon the number of combinations of values specified in \code{at}. Columns containing marginal effects are distinguished by their name (prefixed by \code{dydx_}). These columns can be extracted from a \dQuote{margins} object using, for example, \code{marginal_effects(margins(model))}. Columns prefixed by \code{Var_} specify the variances of the \emph{average} marginal effects, whereas (optional) columns prefixed by \code{SE_} contain observation-specific standard errors. A special column, \code{_at_number}, specifies which \code{at} combination a given row corresponds to; the data frame carries an attribute \dQuote{at} that specifies which combination of values this index represents. The \code{summary.margins()} method provides for pretty printing of the results, particularly in cases where \code{at} is specified. A variance-covariance matrix for the average marginal effects is returned as an attribute (though behavior when \code{at} is non-NULL is unspecified). diff --git a/tests/testthat/tests-core.R b/tests/testthat/tests-core.R index ca7d300..db970fd 100644 --- a/tests/testthat/tests-core.R +++ b/tests/testthat/tests-core.R @@ -40,7 +40,6 @@ test_that("Test build_datalist()", { rm(m) }) - context("Test `at` behavior") test_that("`at` behavior works and warnings/errors occur as expected", { x <- lm(mpg ~ cyl * hp + wt, data = head(mtcars)) @@ -54,20 +53,21 @@ test_that("`at` behavior works and warnings/errors occur as expected", { test_that("factor variables work", { x1 <- lm(mpg ~ factor(cyl), data = head(mtcars)) - expect_true(inherits(marginal_effects(x1), "data.frame"), label = "factors work in formula") + expect_true(inherits(marginal_effects(x1), "data.frame"), label = "factors work in formula") x2 <- lm(Sepal.Length ~ Species, data = iris) expect_true(inherits(marginal_effects(x2), "data.frame"), label = "natural factors work") }) test_that("dydx() works", { - mtcars$am <- as.logical(mtcars$am) - mtcars$cyl <- factor(mtcars$cyl) - x <- lm(mpg ~ wt + am + cyl, data = head(mtcars)) - expect_true(inherits(dydx(head(mtcars), x, "wt"), "data.frame"), label = "dydx dispatch works for numeric") - expect_true(inherits(dydx(head(mtcars), x, "cyl"), "data.frame"), label = "dydx dispatch works for factor") - expect_true(inherits(dydx(head(mtcars), x, "am"), "data.frame"), label = "dydx dispatch works for logical") - expect_true(inherits(marginal_effects(x), "data.frame"), label = "dydx dispatch works via marginal_effects()") - rm(mtcars) + mtcars2 <- mtcars + mtcars2$am <- as.logical(mtcars2$am) + mtcars2$cyl <- factor(mtcars2$cyl) + x <- lm(mpg ~ wt + am + cyl, data = head(mtcars2)) + expect_true(inherits(dydx(head(mtcars2), x, "wt"), "data.frame"), label = "dydx dispatch works for numeric") + expect_true(inherits(dydx(head(mtcars2), x, "cyl"), "data.frame"), label = "dydx dispatch works for factor") + expect_true(inherits(dydx(head(mtcars2), x, "am"), "data.frame"), label = "dydx dispatch works for logical") + expect_true(inherits(marginal_effects(x), "data.frame"), label = "dydx dispatch works via marginal_effects()") + rm(mtcars2) }) test_that("alternative dydx() args", { @@ -77,7 +77,6 @@ test_that("alternative dydx() args", { expect_true(inherits(dydx(head(mtcars), x, "wt", change = "sd"), "data.frame"), label = "dydx w/ change = 'sd'") expect_true(inherits(dydx(head(mtcars), x, "wt", change = range(mtcars[["wt"]], na.rm = TRUE)), "data.frame"), label = "dydx w/ change = c(a,b)") expect_error(dydx(head(mtcars), x, "wt", change = !L), label = "error in dydx w/ change = 1L") - rm(mtcars) }) @@ -108,5 +107,5 @@ test_that("vcov.margins() method words", { m <- margins(x <- lm(mpg ~ wt * hp, data = mtcars)) expect_true(inherits(vcov(m), "matrix"), label = "vcov() method words") expect_true(identical(dim(vcov(m)), c(2L, 2L)), label = "vcov.margins() has correct dimensions") - + }) diff --git a/tests/testthat/tests-edge-cases.R b/tests/testthat/tests-edge-cases.R index e572cb2..8216046 100644 --- a/tests/testthat/tests-edge-cases.R +++ b/tests/testthat/tests-edge-cases.R @@ -28,6 +28,18 @@ test_that("margins() works with missing factor levels", { suppressWarnings(m <- margins(x)) expect_true(inherits(m, "data.frame")) expect_true(nrow(summary(m)) == 4L) + rm(mtcars2) +}) + +test_that("margins() works with `at` for a factor predictor", { + mtcars2 <- mtcars + mtcars2$cyl <- factor(mtcars2$cyl) + x <- lm(mpg ~ cyl + gear, data = mtcars2) + m <- margins(x, at = list(cyl = "6")) + expect_true(inherits(m, "data.frame")) + expect_equal(summary(m)$factor, c("cyl4", "cyl8", "gear")) + expect_equivalent(summary(m)$AME, c(6.7470899, -4.2182834, 0.7430041), tolerance = tol) + rm(mtcars2) }) test_that("margins() errors correctly when there are no RHS variables", { diff --git a/tests/testthat/tests-methods.R b/tests/testthat/tests-methods.R index 4183548..35d4ef8 100644 --- a/tests/testthat/tests-methods.R +++ b/tests/testthat/tests-methods.R @@ -33,7 +33,7 @@ if (requireNamespace("betareg")) { context("Test 'lme4' methods") if (requireNamespace("lme4")) { data("ChickWeight", package = "datasets") - + # linear mixed effects models (random intercepts) m <- lme4::lmer(weight ~ Diet + (1|Chick), data = ChickWeight) test_that("Test marginal_effects() for 'lmerMod' (single grouping)", { @@ -46,12 +46,12 @@ if (requireNamespace("lme4")) { }) test_that("Test margins(vce = 'simulation') for 'lmerMod' (single grouping)", { expect_true(inherits(margins(m, vce = "simulation", iterations = 5), - "margins")) + "margins")) }) test_that("Test margins(vce = 'bootstrap') for 'lmerMod' (single grouping)", { expect_true(inherits(suppressWarnings({ margins(m, vce = "bootstrap", iterations = 5) - }), "margins")) + }), "margins")) }) # lmer with multiple random intercepts m <- lme4::lmer(weight ~ Diet + (1|Time) + (1|Chick), data = ChickWeight) @@ -65,14 +65,14 @@ if (requireNamespace("lme4")) { }) test_that("Test margins(vce = 'simulation') for 'lmerMod' (multiple grouping)", { expect_true(inherits(margins(m, vce = "simulation", iterations = 5), - "margins")) + "margins")) }) test_that("Test margins(vce = 'bootstrap') for 'lmerMod' (multiple grouping)", { expect_true(inherits(suppressWarnings({ margins(m, vce = "bootstrap", iterations = 5) - }), "margins")) + }), "margins")) }) - + # lmer with random slopes m <- lme4::lmer(weight ~ Diet + (1+Time|Chick), data = ChickWeight) test_that("Test marginal_effects() for 'lmerMod' (random slopes)", { @@ -85,14 +85,14 @@ if (requireNamespace("lme4")) { }) test_that("Test margins(vce = 'simulation') for 'lmerMod' (random slopes)", { expect_true(inherits(margins(m, vce = "simulation", iterations = 5), - "margins")) + "margins")) }) test_that("Test margins(vce = 'bootstrap') for 'lmerMod' (random slopes)", { expect_true(inherits(suppressWarnings({ margins(m, vce = "bootstrap", iterations = 5) - }), "margins")) + }), "margins")) }) - + # generalized linear mixed effects models ChickWeight$high <- cut(ChickWeight$weight, 2) m <- lme4::glmer(high ~ Diet + (1|Chick), data = ChickWeight, binomial) @@ -106,14 +106,14 @@ if (requireNamespace("lme4")) { }) test_that("Test margins(vce = 'simulation') for 'merMod'", { expect_true(inherits(margins(m, vce = "simulation", iterations = 5), - "margins")) + "margins")) }) test_that("Test margins(vce = 'bootstrap') for 'merMod'", { expect_true(inherits(suppressWarnings({ margins(m, vce = "bootstrap", iterations = 5) - }), "margins")) + }), "margins")) }) - + } context("Test 'MASS' methods") @@ -146,7 +146,7 @@ if (requireNamespace("nnet")) { expect_true(inherits(margins(m), "margins")) expect_true(inherits(margins(m, category = "c"), "margins")) }) - + # "multinom" objects data("housing", package = "MASS") m <- nnet::multinom(Sat ~ Infl + Type + Cont, weights = Freq, data = housing) @@ -179,25 +179,65 @@ if (requireNamespace("ordinal")) { test_that("Test 'svyglm' methods", { if (requireNamespace("survey")) { data("fpc", package = "survey") - svyd <- survey::svydesign(weights=~weight, ids=~psuid, strata=~stratid, fpc=~Nh, variables=~x + nh, data=fpc, nest=TRUE) + fpc$nh_f = factor(fpc$nh) + svyd <- survey::svydesign(weights=~weight, ids=~psuid, strata=~stratid, fpc=~Nh, variables=~x + nh + nh_f, data=fpc, nest=TRUE) + svrepd <- survey::as.svrepdesign(svyd) + svrepd <- survey::svrepdesign(survey::make.formula(colnames(svrepd)), weights(svrepd), weights(svrepd, type="sampling"), data=fpc, combined.weights=FALSE) x <- survey::svyglm(x ~ nh, design = svyd) - test_that("Test margins() for 'svyglm' without passing 'design' object", { + xf <- survey::svyglm(x ~ nh_f, design = svyd) + xrep <- survey::svyglm(x ~ nh, design = svrepd) + test_that("Test margins() for 'svyglm' without passing 'data'", { m1 <- margins(x) expect_true(inherits(print(m1), "margins"), label = "print() method for margins from svyglm") expect_true(inherits(summary(m1), "data.frame"), label = "summary() method for margins from svyglm") expect_true(inherits(print(summary(m1)), "data.frame"), label = "print() method for summary.margins from svyglm") }) - test_that("Test margins() for 'svyglm' with 'design' object", { - m2 <- margins(x, design = svyd) + test_that("Test margins() for 'svyglm' with a factor predictor (without passing 'data')", { + m1f <- margins(xf) + expect_true(inherits(print(m1f), "margins"), label = "print() method for margins from svyglm") + expect_true(inherits(summary(m1f), "data.frame"), label = "summary() method for margins from svyglm") + expect_true(inherits(print(summary(m1f)), "data.frame"), label = "print() method for summary.margins from svyglm") + }) + test_that("Test margins() for 'svyglm' estimated on replicate weights design without passing 'data'", { + m1rep <- margins(xrep) + expect_true(inherits(print(m1rep), "margins"), label = "print() method for margins from svyglm") + expect_true(inherits(summary(m1rep), "data.frame"), label = "summary() method for margins from svyglm") + expect_true(inherits(print(summary(m1rep)), "data.frame"), label = "print() method for summary.margins from svyglm") + expect_equal(summary(m1rep)[, c("factor", "AME")], + summary(m1)[, c("factor", "AME")], label = "same point estimates using survey.design and svyrep.design objects") + }) + test_that("Test margins() for 'svyglm' with 'data'", { + m2 <- margins(x, data = svyd) expect_true(inherits(print(m2), "margins"), label = "print() method for margins from svyglm (with 'design')") expect_true(inherits(summary(m2), "data.frame"), label = "summary() method for margins from svyglm (with 'design')") expect_true(inherits(print(summary(m2)), "data.frame"), label = "print() method for summary.margins from svyglm (with 'design')") }) - test_that("Test margins() for 'svyglm' with 'design' object and 'at' specification", { - m3 <- margins(x, at = list(nh = mean(fpc$nh)), design = svyd) + test_that("Test margins() for 'svyglm' with 'data'", { + m2rep <- margins(x, data = svrepd) + expect_true(inherits(print(m2rep), "margins"), label = "print() method for margins from svyglm (with 'design')") + expect_true(inherits(summary(m2rep), "data.frame"), label = "summary() method for margins from svyglm (with 'design')") + expect_true(inherits(print(summary(m2rep)), "data.frame"), label = "print() method for summary.margins from svyglm (with 'design')") + expect_equal(summary(m2rep)[, c("factor", "AME")], + summary(m2)[, c("factor", "AME")], label = "same point estimates using survey.design and svyrep.design objects") + }) + test_that("Test margins() for 'svyglm' with 'data' object and 'at' specification", { + m3 <- margins(x, at = list(nh = mean(fpc$nh)), data = svyd) expect_true(inherits(print(m3), "margins"), label = "print() method for margins from svyglm (with 'at')") expect_true(inherits(summary(m3), "data.frame"), label = "summary() method for margins from svyglm (with 'at')") expect_true(inherits(print(summary(m3)), "data.frame"), label = "print() method for summary.margins from svyglm (with 'at')") }) + test_that("Test margins() for 'svyglm' estimated on replicate weights design with 'data' object and 'at' specification", { + m3rep <- margins(xrep, at = list(nh = mean(fpc$nh)), data = svyd) + expect_true(inherits(print(m3rep), "margins"), label = "print() method for margins from svyglm (with 'at')") + expect_true(inherits(summary(m3rep), "data.frame"), label = "summary() method for margins from svyglm (with 'at')") + expect_true(inherits(print(summary(m3rep)), "data.frame"), label = "print() method for summary.margins from svyglm (with 'at')") + }) + test_that("Test margins() for 'svyglm' with 'data' object and 'at' specification for a factor predictor", { + m3f <- margins(xf, at = list(nh_f = levels(fpc$nh_f)[2]), data = svyd) + expect_true(inherits(print(m3f), "margins"), label = "print() method for margins from svyglm (with 'at')") + expect_true(inherits(summary(m3f), "data.frame"), label = "summary() method for margins from svyglm (with 'at')") + expect_true(inherits(print(summary(m3f)), "data.frame"), label = "print() method for summary.margins from svyglm (with 'at')") + }) + rm(fpc, svyd, svrepd, x, xf, xrep) } }) From 7f42111ea9dfeed2b00fe33e4231fcd54b6c4223 Mon Sep 17 00:00:00 2001 From: tzoltak Date: Fri, 9 Oct 2020 22:53:23 +0200 Subject: [PATCH 2/2] mistake in describing author in DESCRIPTION --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b13fe25..c7186a9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,8 +18,8 @@ Authors@R: c(person("Thomas J.", "Leeper", person("Jacob A.", "Long", role = c("ctb"), email = "long.1377@osu.edu", - comment = c(ORCID = "0000-0002-1582-6214"), - person("Tomasz", ""\u017b\u00F3\u0142tak"", + comment = c(ORCID = "0000-0002-1582-6214")), + person("Tomasz", "\u017b\u00F3\u0142tak", role = c("ctb"), email = "tomek@zozlak.org", comment = c(ORCID = "0000-0003-1354-4472"))