diff --git a/DESCRIPTION b/DESCRIPTION index c2c339a..3501ecd 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Depends: R (>= 2.10), fixest (>= 0.10.1) diff --git a/NAMESPACE b/NAMESPACE index 7c9e761..8f2bdbf 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(gen_data) export(get_honestdid_obj_did2s) export(honest_did_did2s) export(plot_event_study) +export(sparse_model_matrix) import(data.table) import(fixest) importFrom(Matrix,sparseMatrix) diff --git a/R/utils.R b/R/utils.R index 61944b8..631fe20 100755 --- a/R/utils.R +++ b/R/utils.R @@ -1,113 +1,3 @@ -# Copied directly from https://github.com/lrberge/fixest/pull/418/commits/869aeee1ac030fe787a217fbf9f358b7f0eef8f4 -#' Create, or interact variables with, factors -#' -#' Treat a variable as a factor, or interacts a variable with a factor. Values to be dropped/kept from the factor can be easily set. Note that to interact fixed-effects, this function should not be used: instead use directly the syntax `fe1^fe2`. -#' -#' -#' @inheritParams bin -#' -#' @param factor_var A vector (of any type) that will be treated as a factor. You can set references (i.e. exclude values for which to create dummies) with the `ref` argument. -#' @param var A variable of the same length as `factor_var`. This variable will be interacted with the factor in `factor_var`. It can be numeric or factor-like. To force a numeric variable to be treated as a factor, you can add the `i.` prefix to a variable name. For instance take a numeric variable `x_num`: `i(x_fact, x_num)` will treat `x_num` as numeric while `i(x_fact, i.x_num)` will treat `x_num` as a factor (it's a shortcut to `as.factor(x_num)`). -#' @param ref A vector of values to be taken as references from `factor_var`. Can also be a logical: if `TRUE`, then the first value of `factor_var` will be removed. If `ref` is a character vector, partial matching is applied to values; use "@" as the first character to enable regular expression matching. See examples. -#' @param keep A vector of values to be kept from `factor_var` (all others are dropped). By default they should be values from `factor_var` and if `keep` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead. -#' @param ref2 A vector of values to be dropped from `var`. By default they should be values from `var` and if `ref2` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead. -#' @param keep2 A vector of values to be kept from `var` (all others are dropped). By default they should be values from `var` and if `keep2` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead. -#' @param bin2 A list or vector defining the binning of the second variable. See help for the argument `bin` for details (or look at the help of the function [`bin`]). You can use `.()` for `list()`. -#' @param ... Not currently used. -#' -#' @details -#' To interact fixed-effects, this function should not be used: instead use directly the syntax `fe1^fe2` in the fixed-effects part of the formula. Please see the details and examples in the help page of [`feols`]. -#' -#' @return -#' It returns a matrix with number of rows the length of `factor_var`. If there is no interacted variable or it is interacted with a numeric variable, the number of columns is equal to the number of cases contained in `factor_var` minus the reference(s). If the interacted variable is a factor, the number of columns is the number of combined cases between `factor_var` and `var`. -#' -#' @author -#' Laurent Berge -#' -#' @seealso -#' [`iplot`][fixest::coefplot] to plot interactions or factors created with `i()`, [`feols`] for OLS estimation with multiple fixed-effects. -#' -#' See the function [`bin`] for binning variables. -#' -#' @examples -#' -#' # -#' # Simple illustration -#' # -#' -#' x = rep(letters[1:4], 3)[1:10] -#' y = rep(1:4, c(1, 2, 3, 4)) -#' -#' # interaction -#' data.frame(x, y, i(x, y, ref = TRUE)) -#' -#' # without interaction -#' data.frame(x, i(x, "b")) -#' -#' # you can interact factors too -#' z = rep(c("e", "f", "g"), c(5, 3, 2)) -#' data.frame(x, z, i(x, z)) -#' -#' # to force a numeric variable to be treated as a factor: use i. -#' data.frame(x, y, i(x, i.y)) -#' -#' # Binning -#' data.frame(x, i(x, bin = list(ab = c("a", "b")))) -#' -#' # Same as before but using .() for list() and a regular expression -#' # note that to trigger a regex, you need to use an @ first -#' data.frame(x, i(x, bin = .(ab = "@a|b"))) -#' -#' # -#' # In fixest estimations -#' # -#' -#' data(base_did) -#' # We interact the variable 'period' with the variable 'treat' -#' est_did = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did) -#' -#' # => plot only interactions with iplot -#' iplot(est_did) -#' -#' # Using i() for factors -#' est_bis = feols(y ~ x1 + i(period, keep = 3:6) + i(period, treat, 5) | id, base_did) -#' -#' # we plot the second set of variables created with i() -#' # => we need to use keep (otherwise only the first one is represented) -#' coefplot(est_bis, keep = "trea") -#' -#' # => special treatment in etable -#' etable(est_bis, dict = c("6" = "six")) -#' -#' # -#' # Interact two factors -#' # -#' -#' # We use the i. prefix to consider week as a factor -#' data(airquality) -#' aq = airquality -#' aq$week = aq$Day %/% 7 + 1 -#' -#' # Interacting Month and week: -#' res_2F = feols(Ozone ~ Solar.R + i(Month, i.week), aq) -#' -#' # Same but dropping the 5th Month and 1st week -#' res_2F_bis = feols(Ozone ~ Solar.R + i(Month, i.week, ref = 5, ref2 = 1), aq) -#' -#' etable(res_2F, res_2F_bis) -#' -#' # -#' # Binning -#' # -#' -#' data(airquality) -#' -#' feols(Ozone ~ i(Month, bin = "bin::2"), airquality) -#' -#' feols(Ozone ~ i(Month, bin = list(summer = 7:9)), airquality) -#' -#' -#' i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){ # Used to create interactions @@ -140,7 +30,7 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){ var_name = fixest:::deparse_long(mc$f2) var = dots$f2 - check_value(var, "vector", .arg_name = "f2") + dreamerr::check_value(var, "vector", .arg_name = "f2") } # General information on the factor variable @@ -160,10 +50,10 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){ if(grepl("^i\\.", var_name)){ var_name = gsub("^i\\.", "", var_name) var = str2lang(var_name) - check_value_plus(var, "evalset vector", .data = parent.frame(), .arg_name = "var") + dreamerr::check_value_plus(var, "evalset vector", .data = parent.frame(), .arg_name = "var") IS_INTER_FACTOR = TRUE } else { - check_value(var, "vector", .arg_name = "var") + dreamerr::check_value(var, "vector", .arg_name = "var") } } else { # f2 was used @@ -220,18 +110,18 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){ } if(!missing(bin)){ - bin = error_sender(eval_dot(bin), arg_name = "bin") + bin = fixest:::error_sender(fixest:::eval_dot(bin), arg_name = "bin") if(!is.null(bin)){ - f = bin_factor(bin, f, f_name) + f = fixest:::bin_factor(bin, f, f_name) } } if(IS_INTER_FACTOR && !fixest:::missnull(bin2)){ - bin2 = error_sender(eval_dot(bin2), arg_name = "bin2") + bin2 = fixest:::error_sender(fixest:::eval_dot(bin2), arg_name = "bin2") if(!is.null(bin2)){ - var = bin_factor(bin2, var, f_name) + var = fixest:::bin_factor(bin2, var, f_name) } } @@ -373,7 +263,7 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){ return(res) } - res = cpp_factor_matrix(fe_num, is_na_all, who_is_dropped, var, col_names) + res = fixest:::cpp_factor_matrix(fe_num, is_na_all, who_is_dropped, var, col_names) # res => matrix with... # - NAs where appropriate # - appropriate number of columns @@ -452,8 +342,7 @@ i_noref = function(factor_var, var, ref, bin, keep, ref2, keep2, bin2){ #' #' This function creates the left-hand-side or the right-hand-side(s) of a [`femlm`], [`feols`] or [`feglm`] estimation. Note that this function currently does not accept a formula #' -#' @inheritParams nobs.fixest -#' +#' @param object A `fixest` estimation object #' @param data If missing (default) then the original data is obtained by evaluating the `call`. Otherwise, it should be a `data.frame`. #' @param type Character vector or one sided formula, default is "rhs". Contains the type of matrix/data.frame to be returned. Possible values are: "lhs", "rhs", "fixef", "iv.rhs1" (1st stage RHS), "iv.rhs2" (2nd stage RHS), "iv.endo" (endogenous vars.), "iv.exo" (exogenous vars), "iv.inst" (instruments). #' @param na.rm Default is `TRUE`. Should observations with NAs be removed from the matrix? @@ -507,7 +396,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin } if (any(grepl("^iv", type)) && !isTRUE(object$iv)) { - stop("The type", enumerate_items(grep("^iv", type, value = TRUE), "s.is"), " only valid for IV estimations.") + stop("The type", dreamerr::enumerate_items(grep("^iv", type, value = TRUE), "s.is"), " only valid for IV estimations.") } dreamerr::check_arg(subset, "logical scalar | character vector no na") @@ -540,7 +429,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin isFormula = FALSE split_fml = NULL if ("formula" %in% class(object)) { - split_fml = fml_split_internal(object) + split_fml = fixest:::fml_split_internal(object) if (collin.rm == TRUE | length(split_fml) == 3) { message("Formula passed to sparse_model_matrix with collin.rm == TRUE or iv. Estimating feols with formula.") @@ -570,7 +459,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin fake_intercept = !is.null(split_fml[[2]]) | fml_0 } else { - fml_linear = formula(object, type = "linear") + fml_linear = stats::formula(object, type = "linear") # we kick out the intercept if there is presence of fixed-effects fml_0 = attr(stats::terms(fml_linear), "intercept") == 0 @@ -596,10 +485,10 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin if (isFormula && (length(split_fml) == 3)) { fml_iv = split_fml[[3]] - fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]]) + fml = fixest:::.xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]]) } else if (isTRUE(object$iv)) { fml_iv = object$fml_all$iv - fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]]) + fml = fixest:::.xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]]) } vars = attr(stats::terms(fml), "term.labels") @@ -618,7 +507,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin } else { if (isFormula) { - fixef_fml = .xpd(~ ..fe, ..fe = split_fml[[2]]) + fixef_fml = fixest:::.xpd(~ ..fe, ..fe = split_fml[[2]]) } else { fixef_fml = object$fml_all$fixef } @@ -790,7 +679,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin fml = object$fml if (object$iv_stage == 2) { fml_iv = object$fml_all$iv - fml = .xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]]) + fml = fixest:::.xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]]) } vars = attr(stats::terms(fml), "term.labels") iv_rhs1 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs1", add_intercept = !fake_intercept) @@ -810,12 +699,12 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE, collin fit_vars = c() for (i in seq_along(stage_1)) { fit_vars[i] = v = paste0("fit_", names(stage_1)[i]) - data[[v]] = predict(stage_1[[i]], newdata = data, sample = "original") + data[[v]] = stats::predict(stage_1[[i]], newdata = data, sample = "original") } # II) we create the variables fml = object$fml - fml = .xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]]) + fml = fixest:::.xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]]) vars = attr(stats::terms(fml), "term.labels") iv_rhs2 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs2", add_intercept = !fake_intercept) @@ -1055,7 +944,7 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL, type } # TODO: Should I use error_sender? - # mat = error_sender(fixest_model_matrix_extra( + # mat = fixest:::error_sender(fixest_model_matrix_extra( # object = object, newdata = data, original_data = original_data, # fml = fml, fake_intercept = fake_intercept, # subset = subset), @@ -1079,7 +968,7 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL, type endo = fml_iv[[2]] # Trick to get the rhs variables as a character vector - endo = .xpd(~ ..endo, ..endo = endo) + endo = fixest:::.xpd(~ ..endo, ..endo = endo) endo = attr(stats::terms(endo), "term.labels") exo = lapply(object$iv_first_stage, function(x) names(stats::coef(x))) diff --git a/README.Rmd b/README.Rmd index a18c7c7..58f26d8 100755 --- a/README.Rmd +++ b/README.Rmd @@ -10,7 +10,8 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", - out.width = "100%" + out.width = "100%", + dpi = 300 ) ``` @@ -77,7 +78,7 @@ df_het = as.data.frame(df_het) Here is a plot of the average outcome variable for each of the groups: -```{r plot-df-het, fig.width=8, fig.height=4, fig.cap="Example data with heterogeneous treatment effects"} +```{r plot-df-het, fig.width=8, fig.height=4, dpi=300, fig.cap="Example data with heterogeneous treatment effects"} # Mean for treatment group-year agg <- aggregate(df_het$dep_var, by = list(g = df_het$g, year = df_het$year), FUN = mean) @@ -136,7 +137,7 @@ es <- did2s(df_het, And plot the results: -```{r plot-es, fig.cap="Event-study plot with example data"} +```{r plot-es, fig.cap="Event-study plot with example data", fig.width=8, fig.height=5, dpi=300} fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5) # Add the (mean) true effects @@ -153,7 +154,7 @@ legend( ### Comparison to TWFE -```{r plot-compare, ig.cap="TWFE and Two-Stage estimates of Event-Study"} +```{r plot-compare, fig.cap="TWFE and Two-Stage estimates of Event-Study", fig.width=8, fig.height=5, dpi=300} twfe <- feols(dep_var ~ i(rel_year, ref = c(-1, Inf)) | unit + year, data = df_het) fixest::iplot(list(es, twfe), @@ -178,7 +179,7 @@ In version 1.1.0, we added support for computing a sensitivity analysis using th Here's an example using data from [here](https://github.com/Mixtape-Sessions/Advanced-DID/tree/main/Exercises/Exercise-1). The provided dataset `ehec_data.dta` contains a state-level panel dataset on health insurance coverage and Medicaid expansion. The variable `dins` shows the share of low-income childless adults with health insurance in the state. The variable `yexp2` gives the year that a state expanded Medicaid coverage under the Affordable Care Act, and is missing if the state never expanded. -```{r ehec-data-est, fig.cap="Estimates of the effect of Medicaid expansion on health insurance coverage"} +```{r ehec-data-est, fig.cap="Estimates of the effect of Medicaid expansion on health insurance coverage", fig.width=8, fig.height=5, dpi=300} library(HonestDiD) library(ggplot2) @@ -200,7 +201,7 @@ es_did2s <- did2s( coefplot(es_did2s) ``` -```{r sensitivity, fig.cap="Sensitivity analysis for the example data"} +```{r sensitivity, fig.cap="Sensitivity analysis for the example data", fig.width=8, fig.height=5, dpi=300} # Relative Magnitude sensitivity analysis sensitivity_results <- es_did2s |> # Take fixest obj and convert for `honest_did_did2s` @@ -222,7 +223,7 @@ HonestDiD::createSensitivityPlot_relativeMagnitudes( ## Event-study plot -```{r} +```{r, fig.cap="Multiple event-study estimators", fig.width=12, fig.height=8.5, dpi=300} library(tidyverse) data(df_het) df = df_het diff --git a/README.md b/README.md index 7457d25..6254526 100755 --- a/README.md +++ b/README.md @@ -229,7 +229,14 @@ legend( ) ``` - +
+ +TWFE and Two-Stage estimates of Event-Study +

+TWFE and Two-Stage estimates of Event-Study +

+ +
### Honest DID @@ -354,7 +361,14 @@ multiple_ests = did2s::event_study( did2s::plot_event_study(multiple_ests) ``` - +
+ +Multiple event-study estimators +

+Multiple event-study estimators +

+ +
# Citation diff --git a/man/figures/README-ehec-data-est-1.png b/man/figures/README-ehec-data-est-1.png index 551252b..8ad837a 100644 Binary files a/man/figures/README-ehec-data-est-1.png and b/man/figures/README-ehec-data-est-1.png differ diff --git a/man/figures/README-plot-compare-1.png b/man/figures/README-plot-compare-1.png index 335fe5a..cd52b94 100644 Binary files a/man/figures/README-plot-compare-1.png and b/man/figures/README-plot-compare-1.png differ diff --git a/man/figures/README-plot-df-het-1.png b/man/figures/README-plot-df-het-1.png index 53ba3e5..5f48aa8 100644 Binary files a/man/figures/README-plot-df-het-1.png and b/man/figures/README-plot-df-het-1.png differ diff --git a/man/figures/README-plot-es-1.png b/man/figures/README-plot-es-1.png index 2e560d0..8807641 100644 Binary files a/man/figures/README-plot-es-1.png and b/man/figures/README-plot-es-1.png differ diff --git a/man/figures/README-sensitivity-1.png b/man/figures/README-sensitivity-1.png index c17acac..3ed48ad 100644 Binary files a/man/figures/README-sensitivity-1.png and b/man/figures/README-sensitivity-1.png differ diff --git a/man/figures/README-unnamed-chunk-3-1.png b/man/figures/README-unnamed-chunk-3-1.png index faa503d..8a8f36c 100644 Binary files a/man/figures/README-unnamed-chunk-3-1.png and b/man/figures/README-unnamed-chunk-3-1.png differ diff --git a/man/sparse_model_matrix.Rd b/man/sparse_model_matrix.Rd new file mode 100644 index 0000000..9515b25 --- /dev/null +++ b/man/sparse_model_matrix.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{sparse_model_matrix} +\alias{sparse_model_matrix} +\title{Design matrix of a \code{fixest} object returned in sparse format} +\usage{ +sparse_model_matrix( + object, + data, + type = "rhs", + na.rm = TRUE, + collin.rm = TRUE, + combine = TRUE, + ... +) +} +\arguments{ +\item{object}{A \code{fixest} estimation object} + +\item{data}{If missing (default) then the original data is obtained by evaluating the \code{call}. Otherwise, it should be a \code{data.frame}.} + +\item{type}{Character vector or one sided formula, default is "rhs". Contains the type of matrix/data.frame to be returned. Possible values are: "lhs", "rhs", "fixef", "iv.rhs1" (1st stage RHS), "iv.rhs2" (2nd stage RHS), "iv.endo" (endogenous vars.), "iv.exo" (exogenous vars), "iv.inst" (instruments).} + +\item{na.rm}{Default is \code{TRUE}. Should observations with NAs be removed from the matrix?} + +\item{collin.rm}{Logical scalar, default is \code{TRUE}. Whether to remove variables that were found to be collinear during the estimation. Beware: it does not perform a collinearity check.} + +\item{combine}{Logical scalar, default is \code{TRUE}. Whether to combine each resulting sparse matrix} + +\item{...}{Not currently used.} +} +\value{ +It returns either a single sparse matrix a list of matrices, depending whether \code{combine} is \code{TRUE} or \code{FALSE}. The sparse matrix is of class \code{dgCMatrix} from the \code{Matrix} package. +} +\description{ +This function creates the left-hand-side or the right-hand-side(s) of a \code{\link{femlm}}, \code{\link{feols}} or \code{\link{feglm}} estimation. Note that this function currently does not accept a formula +} +\examples{ + +est = feols(wt ~ i(vs) + hp | cyl, mtcars) +sparse_model_matrix(est) + + +} +\seealso{ +See also the main estimation functions \code{\link{femlm}}, \code{\link{feols}} or \code{\link{feglm}}. \code{\link{formula.fixest}}, \code{\link{update.fixest}}, \code{\link{summary.fixest}}, \code{\link{vcov.fixest}}. +} +\author{ +Laurent Berge, Kyle Butts +} diff --git a/tests/testthat/test-did2s.R b/tests/testthat/test-did2s.R index e74e60b..f152356 100755 --- a/tests/testthat/test-did2s.R +++ b/tests/testthat/test-did2s.R @@ -8,7 +8,8 @@ library(haven) castle = haven::read_dta("https://github.com/scunning1975/mixtape/raw/master/castle.dta") # Add random 0/1 variable -df_hom[, temp := as.numeric(runif(.N) > 0.5)] + +df_hom$temp = as.numeric(runif(nrow(df_hom)) > 0.5) test_that("estimation runs", { # Static