Add event study example to README

kylebutts · Feb 7, 2024 · 22c3c46 · 22c3c46
1 parent e86f3ef
commit 22c3c46
Show file tree

Hide file tree

Showing 13 changed files with 98 additions and 142 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -26,7 +26,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Depends: 
     R (>= 2.10),
     fixest (>= 0.10.1)

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(gen_data)
 export(get_honestdid_obj_did2s)
 export(honest_did_did2s)
 export(plot_event_study)
+export(sparse_model_matrix)
 import(data.table)
 import(fixest)
 importFrom(Matrix,sparseMatrix)

diff --git a/R/utils.R b/R/utils.R
@@ -1,113 +1,3 @@
-# Copied directly from https://github.com/lrberge/fixest/pull/418/commits/869aeee1ac030fe787a217fbf9f358b7f0eef8f4
-#' Create, or interact variables with, factors
-#'
-#' Treat a variable as a factor, or interacts a variable with a factor. Values to be dropped/kept from the factor can be easily set. Note that to interact fixed-effects, this function should not be used: instead use directly the syntax `fe1^fe2`.
-#'
-#'
-#' @inheritParams bin
-#'
-#' @param factor_var  A vector (of any type) that will be treated as a factor. You can set references (i.e. exclude values for which to create dummies) with the `ref` argument.
-#' @param var A variable of the same length as `factor_var`. This variable will be interacted with the factor in `factor_var`. It can be numeric or factor-like. To force a numeric variable to be treated as a factor, you can add the `i.` prefix to a variable name. For instance take a numeric variable `x_num`: `i(x_fact, x_num)` will treat `x_num` as numeric while `i(x_fact, i.x_num)` will treat `x_num` as a factor (it's a shortcut to `as.factor(x_num)`).
-#' @param ref A vector of values to be taken as references from `factor_var`. Can also be a logical: if `TRUE`, then the first value of `factor_var` will be removed. If `ref` is a character vector, partial matching is applied to values; use "@" as the first character to enable regular expression matching. See examples.
-#' @param keep A vector of values to be kept from `factor_var` (all others are dropped). By default they should be values from `factor_var` and if `keep` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead.
-#' @param ref2 A vector of values to be dropped from `var`. By default they should be values from `var` and if `ref2` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead.
-#' @param keep2 A vector of values to be kept from `var` (all others are dropped). By default they should be values from `var` and if `keep2` is a character vector partial matching is applied. Use "@" as the first character to enable regular expression matching instead.
-#' @param bin2 A list or vector defining the binning of the second variable. See help for the argument `bin` for details (or look at the help of the function [`bin`]). You can use `.()` for `list()`.
-#' @param ... Not currently used.
-#'
-#' @details
-#' To interact fixed-effects, this function should not be used: instead use directly the syntax `fe1^fe2` in the fixed-effects part of the formula. Please see the details and examples in the help page of [`feols`].
-#'
-#' @return
-#' It returns a matrix with number of rows the length of `factor_var`. If there is no interacted variable or it is interacted with a numeric variable, the number of columns is equal to the number of cases contained in `factor_var` minus the reference(s). If the interacted variable is a factor, the number of columns is the number of combined cases between `factor_var` and `var`.
-#'
-#' @author
-#' Laurent Berge
-#'
-#' @seealso
-#' [`iplot`][fixest::coefplot] to plot interactions or factors created with `i()`, [`feols`] for OLS estimation with multiple fixed-effects.
-#'
-#' See the function [`bin`] for binning variables.
-#'
-#' @examples
-#'
-#' #
-#' # Simple illustration
-#' #
-#'
-#' x = rep(letters[1:4], 3)[1:10]
-#' y = rep(1:4, c(1, 2, 3, 4))
-#'
-#' # interaction
-#' data.frame(x, y, i(x, y, ref = TRUE))
-#'
-#' # without interaction
-#' data.frame(x, i(x, "b"))
-#'
-#' # you can interact factors too
-#' z = rep(c("e", "f", "g"), c(5, 3, 2))
-#' data.frame(x, z, i(x, z))
-#'
-#' # to force a numeric variable to be treated as a factor: use i.
-#' data.frame(x, y, i(x, i.y))
-#'
-#' # Binning
-#' data.frame(x, i(x, bin = list(ab = c("a", "b"))))
-#'
-#' # Same as before but using .() for list() and a regular expression
-#' # note that to trigger a regex, you need to use an @ first
-#' data.frame(x, i(x, bin = .(ab = "@a|b")))
-#'
-#' #
-#' # In fixest estimations
-#' #
-#'
-#' data(base_did)
-#' # We interact the variable 'period' with the variable 'treat'
-#' est_did = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did)
-#'
-#' # => plot only interactions with iplot
-#' iplot(est_did)
-#'
-#' # Using i() for factors
-#' est_bis = feols(y ~ x1 + i(period, keep = 3:6) + i(period, treat, 5) | id, base_did)
-#'
-#' # we plot the second set of variables created with i()
-#' # => we need to use keep (otherwise only the first one is represented)
-#' coefplot(est_bis, keep = "trea")
-#'
-#' # => special treatment in etable
-#' etable(est_bis, dict = c("6" = "six"))
-#'
-#' #
-#' # Interact two factors
-#' #
-#'
-#' # We use the i. prefix to consider week as a factor
-#' data(airquality)
-#' aq = airquality
-#' aq$week = aq$Day %/% 7 + 1
-#'
-#' # Interacting Month and week:
-#' res_2F = feols(Ozone ~ Solar.R + i(Month, i.week), aq)
-#'
-#' # Same but dropping the 5th Month and 1st week
-#' res_2F_bis = feols(Ozone ~ Solar.R + i(Month, i.week, ref = 5, ref2 = 1), aq)
-#'
-#' etable(res_2F, res_2F_bis)
-#'
-#' #
-#' # Binning
-#' #
-#'
-#' data(airquality)
-#'
-#' feols(Ozone ~ i(Month, bin = "bin::2"), airquality)
-#'
-#' feols(Ozone ~ i(Month, bin = list(summer = 7:9)), airquality)
-#'
-#'
-#'
 i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
     # Used to create interactions
 
@@ -140,7 +30,7 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
 
         var_name = fixest:::deparse_long(mc$f2)
         var = dots$f2
-        check_value(var, "vector", .arg_name = "f2")
+        dreamerr::check_value(var, "vector", .arg_name = "f2")
     }
 
     # General information on the factor variable
@@ -160,10 +50,10 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
             if(grepl("^i\\.", var_name)){
                 var_name = gsub("^i\\.", "", var_name)
                 var = str2lang(var_name)
-                check_value_plus(var, "evalset vector", .data = parent.frame(), .arg_name = "var")
+                dreamerr::check_value_plus(var, "evalset vector", .data = parent.frame(), .arg_name = "var")
                 IS_INTER_FACTOR = TRUE
             } else {
-                check_value(var, "vector", .arg_name = "var")
+                dreamerr::check_value(var, "vector", .arg_name = "var")
             }
         } else {
             # f2 was used
@@ -220,18 +110,18 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
     }
 
     if(!missing(bin)){
-        bin = error_sender(eval_dot(bin), arg_name = "bin")
+        bin = fixest:::error_sender(fixest:::eval_dot(bin), arg_name = "bin")
 
         if(!is.null(bin)){
-            f = bin_factor(bin, f, f_name)
+            f = fixest:::bin_factor(bin, f, f_name)
         }
     }
 
     if(IS_INTER_FACTOR && !fixest:::missnull(bin2)){
-        bin2 = error_sender(eval_dot(bin2), arg_name = "bin2")
+        bin2 = fixest:::error_sender(fixest:::eval_dot(bin2), arg_name = "bin2")
 
         if(!is.null(bin2)){
-            var = bin_factor(bin2, var, f_name)
+            var = fixest:::bin_factor(bin2, var, f_name)
         }
     }
 
@@ -373,7 +263,7 @@ i = function(factor_var, var, ref, keep, bin, ref2, keep2, bin2, ...){
         return(res)
     }
 
-    res = cpp_factor_matrix(fe_num, is_na_all, who_is_dropped, var, col_names)
+    res = fixest:::cpp_factor_matrix(fe_num, is_na_all, who_is_dropped, var, col_names)
     # res => matrix with...
     #  - NAs where appropriate
     #  - appropriate number of columns
@@ -452,8 +342,7 @@ i_noref = function(factor_var, var, ref, bin, keep, ref2, keep2, bin2){
 #'
 #' This function creates the left-hand-side or the right-hand-side(s) of a [`femlm`], [`feols`] or [`feglm`] estimation. Note that this function currently does not accept a formula
 #'
-#' @inheritParams nobs.fixest
-#'
+#' @param object A `fixest` estimation object
 #' @param data If missing (default) then the original data is obtained by evaluating the `call`. Otherwise, it should be a `data.frame`.
 #' @param type Character vector or one sided formula, default is "rhs". Contains the type of matrix/data.frame to be returned. Possible values are: "lhs", "rhs", "fixef", "iv.rhs1" (1st stage RHS), "iv.rhs2" (2nd stage RHS), "iv.endo" (endogenous vars.), "iv.exo" (exogenous vars), "iv.inst" (instruments).
 #' @param na.rm Default is `TRUE`. Should observations with NAs be removed from the matrix?
@@ -507,7 +396,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin
     }
 
     if (any(grepl("^iv", type)) && !isTRUE(object$iv)) {
-        stop("The type", enumerate_items(grep("^iv", type, value = TRUE), "s.is"), " only valid for IV estimations.")
+        stop("The type", dreamerr::enumerate_items(grep("^iv", type, value = TRUE), "s.is"), " only valid for IV estimations.")
     }
 
     dreamerr::check_arg(subset, "logical scalar | character vector no na")
@@ -540,7 +429,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin
     isFormula = FALSE
     split_fml = NULL
     if ("formula" %in% class(object)) {
-      split_fml = fml_split_internal(object)
+      split_fml = fixest:::fml_split_internal(object)
 
       if (collin.rm == TRUE | length(split_fml) == 3) {
         message("Formula passed to sparse_model_matrix with collin.rm == TRUE or iv. Estimating feols with formula.")
@@ -570,7 +459,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin
       fake_intercept = !is.null(split_fml[[2]]) | fml_0
 
     } else {
-      fml_linear = formula(object, type = "linear")
+      fml_linear = stats::formula(object, type = "linear")
 
       # we kick out the intercept if there is presence of fixed-effects
       fml_0 = attr(stats::terms(fml_linear), "intercept") == 0
@@ -596,10 +485,10 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin
 
         if (isFormula && (length(split_fml) == 3)) {
           fml_iv = split_fml[[3]]
-          fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
+          fml = fixest:::.xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
         } else if (isTRUE(object$iv)) {
           fml_iv = object$fml_all$iv
-          fml = .xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
+          fml = fixest:::.xpd(..lhs ~ ..endo + ..rhs, ..lhs = fml[[2]], ..endo = fml_iv[[2]], ..rhs = fml[[3]])
         }
 
         vars = attr(stats::terms(fml), "term.labels")
@@ -618,7 +507,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin
         } else { 
 
             if (isFormula) {
-              fixef_fml = .xpd(~ ..fe, ..fe = split_fml[[2]])
+              fixef_fml = fixest:::.xpd(~ ..fe, ..fe = split_fml[[2]])
             } else {
               fixef_fml = object$fml_all$fixef
             }
@@ -790,7 +679,7 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin
         fml = object$fml
         if (object$iv_stage == 2) {
             fml_iv = object$fml_all$iv
-            fml = .xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]])
+            fml = fixest:::.xpd(..lhs ~ ..inst + ..rhs, ..lhs = fml[[2]], ..inst = fml_iv[[3]], ..rhs = fml[[3]])
         }
         vars = attr(stats::terms(fml), "term.labels")
         iv_rhs1 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs1", add_intercept = !fake_intercept)
@@ -810,12 +699,12 @@ sparse_model_matrix = function(object, data, type = "rhs", na.rm = TRUE,  collin
         fit_vars = c()
         for (i in seq_along(stage_1)) {
             fit_vars[i] = v = paste0("fit_", names(stage_1)[i])
-            data[[v]] = predict(stage_1[[i]], newdata = data, sample = "original")
+            data[[v]] = stats::predict(stage_1[[i]], newdata = data, sample = "original")
         }
 
         # II) we create the variables
         fml = object$fml
-        fml = .xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]])
+        fml = fixest:::.xpd(..lhs ~ ..fit + ..rhs, ..lhs = fml[[2]], ..fit = fit_vars, ..rhs = fml[[3]])
         vars = attr(stats::terms(fml), "term.labels")
         iv_rhs2 = vars_to_sparse_mat(vars = vars, data = data, object = object, collin.rm = collin.rm, type = "iv.rhs2", add_intercept = !fake_intercept)
 
@@ -1055,7 +944,7 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL, type
     }
 
     # TODO: Should I use error_sender?
-    # mat = error_sender(fixest_model_matrix_extra(
+    # mat = fixest:::error_sender(fixest_model_matrix_extra(
     #     object = object, newdata = data, original_data = original_data,
     #     fml = fml, fake_intercept = fake_intercept,
     #     subset = subset),
@@ -1079,7 +968,7 @@ vars_to_sparse_mat = function(vars, data, collin.rm = FALSE, object = NULL, type
           endo = fml_iv[[2]]
 
           # Trick to get the rhs variables as a character vector
-          endo = .xpd(~ ..endo, ..endo = endo)
+          endo = fixest:::.xpd(~ ..endo, ..endo = endo)
           endo = attr(stats::terms(endo), "term.labels")
 
           exo = lapply(object$iv_first_stage, function(x) names(stats::coef(x)))

diff --git a/README.Rmd b/README.Rmd
@@ -10,7 +10,8 @@ knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>",
   fig.path = "man/figures/README-",
-  out.width = "100%"
+  out.width = "100%",
+  dpi = 300
 )
 ```
 
@@ -77,7 +78,7 @@ df_het = as.data.frame(df_het)
 
 Here is a plot of the average outcome variable for each of the groups:
 
-```{r plot-df-het, fig.width=8, fig.height=4, fig.cap="Example data with heterogeneous treatment effects"}
+```{r plot-df-het, fig.width=8, fig.height=4, dpi=300, fig.cap="Example data with heterogeneous treatment effects"}
 # Mean for treatment group-year
 agg <- aggregate(df_het$dep_var, by = list(g = df_het$g, year = df_het$year), FUN = mean)
 
@@ -136,7 +137,7 @@ es <- did2s(df_het,
 
 And plot the results:
 
-```{r plot-es, fig.cap="Event-study plot with example data"}
+```{r plot-es, fig.cap="Event-study plot with example data", fig.width=8, fig.height=5, dpi=300}
 fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5)
 
 # Add the (mean) true effects
@@ -153,7 +154,7 @@ legend(
 
 ### Comparison to TWFE
 
-```{r plot-compare, ig.cap="TWFE and Two-Stage estimates of Event-Study"}
+```{r plot-compare, fig.cap="TWFE and Two-Stage estimates of Event-Study", fig.width=8, fig.height=5, dpi=300}
 twfe <- feols(dep_var ~ i(rel_year, ref = c(-1, Inf)) | unit + year, data = df_het)
 
 fixest::iplot(list(es, twfe),
@@ -178,7 +179,7 @@ In version 1.1.0, we added support for computing a sensitivity analysis using th
 
 Here's an example using data from [here](https://github.com/Mixtape-Sessions/Advanced-DID/tree/main/Exercises/Exercise-1). The provided dataset `ehec_data.dta` contains a state-level panel dataset on health insurance coverage and Medicaid expansion. The variable `dins` shows the share of low-income childless adults with health insurance in the state. The variable `yexp2` gives the year that a state expanded Medicaid coverage under the Affordable Care Act, and is missing if the state never expanded.
 
-```{r ehec-data-est, fig.cap="Estimates of the effect of Medicaid expansion on health insurance coverage"}
+```{r ehec-data-est, fig.cap="Estimates of the effect of Medicaid expansion on health insurance coverage", fig.width=8, fig.height=5, dpi=300}
 library(HonestDiD)
 library(ggplot2)
 
@@ -200,7 +201,7 @@ es_did2s <- did2s(
 coefplot(es_did2s)
 ```
 
-```{r sensitivity, fig.cap="Sensitivity analysis for the example data"}
+```{r sensitivity, fig.cap="Sensitivity analysis for the example data", fig.width=8, fig.height=5, dpi=300}
 # Relative Magnitude sensitivity analysis
 sensitivity_results <- es_did2s |> 
   # Take fixest obj and convert for `honest_did_did2s`
@@ -222,7 +223,7 @@ HonestDiD::createSensitivityPlot_relativeMagnitudes(
 
 ## Event-study plot
 
-```{r}
+```{r, fig.cap="Multiple event-study estimators", fig.width=12, fig.height=8.5, dpi=300}
 library(tidyverse)
 data(df_het)
 df = df_het

diff --git a/README.md b/README.md
@@ -229,7 +229,14 @@ legend(
 )
 ```
 
-<img src="man/figures/README-plot-compare-1.png" width="100%" />
+<div class="figure">
+
+<img src="man/figures/README-plot-compare-1.png" alt="TWFE and Two-Stage estimates of Event-Study" width="100%" />
+<p class="caption">
+TWFE and Two-Stage estimates of Event-Study
+</p>
+
+</div>
 
 ### Honest DID
 
@@ -354,7 +361,14 @@ multiple_ests = did2s::event_study(
 did2s::plot_event_study(multiple_ests)
 ```
 
-<img src="man/figures/README-unnamed-chunk-3-1.png" width="100%" />
+<div class="figure">
+
+<img src="man/figures/README-unnamed-chunk-3-1.png" alt="Multiple event-study estimators" width="100%" />
+<p class="caption">
+Multiple event-study estimators
+</p>
+
+</div>
 
 # Citation
 

diff --git a/man/figures/README-ehec-data-est-1.png b/man/figures/README-ehec-data-est-1.png
diff --git a/man/figures/README-plot-compare-1.png b/man/figures/README-plot-compare-1.png
diff --git a/man/figures/README-plot-df-het-1.png b/man/figures/README-plot-df-het-1.png
diff --git a/man/figures/README-plot-es-1.png b/man/figures/README-plot-es-1.png
diff --git a/man/figures/README-sensitivity-1.png b/man/figures/README-sensitivity-1.png
diff --git a/man/figures/README-unnamed-chunk-3-1.png b/man/figures/README-unnamed-chunk-3-1.png