diff --git a/README.Rmd b/README.Rmd index 58f26d8..c42c76e 100755 --- a/README.Rmd +++ b/README.Rmd @@ -130,7 +130,7 @@ Then, let's estimate an event study did. Note that relative year has a value of # Event Study es <- did2s(df_het, yname = "dep_var", first_stage = ~ 0 | state + year, - second_stage = ~ i(rel_year, ref = c(-1, Inf)), treatment = "treat", + second_stage = ~ i(rel_year, ref = Inf), treatment = "treat", cluster_var = "state" ) ``` @@ -138,7 +138,7 @@ es <- did2s(df_het, And plot the results: ```{r plot-es, fig.cap="Event-study plot with example data", fig.width=8, fig.height=5, dpi=300} -fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5) +fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5, drop = "Inf") # Add the (mean) true effects true_effects <- head(tapply((df_het$te + df_het$te_dynamic), df_het$rel_year, mean), -1) @@ -155,16 +155,17 @@ legend( ### Comparison to TWFE ```{r plot-compare, fig.cap="TWFE and Two-Stage estimates of Event-Study", fig.width=8, fig.height=5, dpi=300} -twfe <- feols(dep_var ~ i(rel_year, ref = c(-1, Inf)) | unit + year, data = df_het) +twfe <- feols(dep_var ~ i(rel_year, ref = c(Inf)) | unit + year, data = df_het) -fixest::iplot(list(es, twfe), +fixest::iplot( + list(es, twfe), sep = 0.2, ref.line = -0.5, col = c("steelblue", "#82b446"), pt.pch = c(20, 18), xlab = "Relative time to treatment", - main = "Event study: Staggered treatment (comparison)" + main = "Event study: Staggered treatment (comparison)", + drop = "Inf" ) - # Legend legend( x = -20, y = 3, col = c("steelblue", "#82b446"), pch = c(20, 18), @@ -198,7 +199,7 @@ es_did2s <- did2s( cluster_var = "stfips" ) -coefplot(es_did2s) +iplot(es_did2s, drop = "-100") ``` ```{r sensitivity, fig.cap="Sensitivity analysis for the example data", fig.width=8, fig.height=5, dpi=300} diff --git a/README.md b/README.md index 6254526..97eaa7e 100755 --- a/README.md +++ b/README.md @@ -83,6 +83,9 @@ library(did2s) #> year = {2021}, #> url = {https://journal.r-project.org/articles/RJ-2022-048/}, #> } +``` + +``` r # Load Data from R package data("df_het", package = "did2s") @@ -149,6 +152,9 @@ static <- did2s( #> - second stage formula `~ i(treat, ref = FALSE)` #> - The indicator variable that denotes when treatment is on is `treat` #> - Standard errors will be clustered by `state` +``` + +``` r fixest::etable(static) #> static @@ -174,12 +180,12 @@ second stage formula. # Event Study es <- did2s(df_het, yname = "dep_var", first_stage = ~ 0 | state + year, - second_stage = ~ i(rel_year, ref = c(-1, Inf)), treatment = "treat", + second_stage = ~ i(rel_year, ref = Inf), treatment = "treat", cluster_var = "state" ) #> Running Two-stage Difference-in-Differences #> - first stage formula `~ 0 | state + year` -#> - second stage formula `~ i(rel_year, ref = c(-1, Inf))` +#> - second stage formula `~ i(rel_year, ref = Inf)` #> - The indicator variable that denotes when treatment is on is `treat` #> - Standard errors will be clustered by `state` ``` @@ -187,7 +193,7 @@ es <- did2s(df_het, And plot the results: ``` r -fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5) +fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5, drop = "Inf") # Add the (mean) true effects true_effects <- head(tapply((df_het$te + df_het$te_dynamic), df_het$rel_year, mean), -1) @@ -212,16 +218,21 @@ Event-study plot with example data ### Comparison to TWFE ``` r -twfe <- feols(dep_var ~ i(rel_year, ref = c(-1, Inf)) | unit + year, data = df_het) +twfe <- feols(dep_var ~ i(rel_year, ref = c(Inf)) | unit + year, data = df_het) +#> The variable 'rel_year::20' has been removed because of collinearity (see $collin.var). +``` + +``` r -fixest::iplot(list(es, twfe), +fixest::iplot( + list(es, twfe), sep = 0.2, ref.line = -0.5, col = c("steelblue", "#82b446"), pt.pch = c(20, 18), xlab = "Relative time to treatment", - main = "Event study: Staggered treatment (comparison)" + main = "Event study: Staggered treatment (comparison)", + drop = "Inf" ) - # Legend legend( x = -20, y = 3, col = c("steelblue", "#82b446"), pch = c(20, 18), @@ -275,8 +286,11 @@ es_did2s <- did2s( #> - second stage formula `~ 0 + i(rel_year, ref = -100)` #> - The indicator variable that denotes when treatment is on is `treated` #> - Standard errors will be clustered by `stfips` +``` + +``` r -coefplot(es_did2s) +iplot(es_did2s, drop = "-100") ```
@@ -303,14 +317,15 @@ sensitivity_results <- es_did2s |> #> Warning in .ARP_computeCI(betahat = betahat, sigma = sigma, numPrePeriods = #> numPrePeriods, : CI is open at one of the endpoints; CI length may not be #> accurate - #> Warning in .ARP_computeCI(betahat = betahat, sigma = sigma, numPrePeriods = #> numPrePeriods, : CI is open at one of the endpoints; CI length may not be #> accurate - #> Warning in .ARP_computeCI(betahat = betahat, sigma = sigma, numPrePeriods = #> numPrePeriods, : CI is open at one of the endpoints; CI length may not be #> accurate +``` + +``` r # Create plot HonestDiD::createSensitivityPlot_relativeMagnitudes( @@ -341,6 +356,9 @@ library(tidyverse) #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() #> ℹ Use the conflicted package () to force all conflicts to become errors +``` + +``` r data(df_het) df = df_het multiple_ests = did2s::event_study( @@ -358,6 +376,9 @@ multiple_ests = did2s::event_study( #> Estimating using Sun and Abraham (2020) #> Estimating using Borusyak, Jaravel, Spiess (2021) #> Estimating using Roth and Sant'Anna (2021) +``` + +``` r did2s::plot_event_study(multiple_ests) ``` diff --git a/man/figures/README-ehec-data-est-1.png b/man/figures/README-ehec-data-est-1.png index 8ad837a..c8aa435 100644 Binary files a/man/figures/README-ehec-data-est-1.png and b/man/figures/README-ehec-data-est-1.png differ diff --git a/man/figures/README-plot-compare-1.png b/man/figures/README-plot-compare-1.png index cd52b94..4b1f2e1 100644 Binary files a/man/figures/README-plot-compare-1.png and b/man/figures/README-plot-compare-1.png differ diff --git a/man/figures/README-plot-es-1.png b/man/figures/README-plot-es-1.png index 8807641..7d16232 100644 Binary files a/man/figures/README-plot-es-1.png and b/man/figures/README-plot-es-1.png differ diff --git a/man/figures/README-unnamed-chunk-3-1.png b/man/figures/README-unnamed-chunk-3-1.png index 8a8f36c..a70b353 100644 Binary files a/man/figures/README-unnamed-chunk-3-1.png and b/man/figures/README-unnamed-chunk-3-1.png differ diff --git a/vignettes/Two-Stage-Difference-in-Differences.Rmd b/vignettes/Two-Stage-Difference-in-Differences.Rmd index ed2d282..0b8f3d7 100755 --- a/vignettes/Two-Stage-Difference-in-Differences.Rmd +++ b/vignettes/Two-Stage-Difference-in-Differences.Rmd @@ -90,7 +90,7 @@ The main function is **`did2s()`**, which estimates the two-stage DiD procedure. - `yname`: The outcome variable. For example, `"y"`. - `first_stage`: Formula indicating the first stage. This can include fixed effects and covariates, but do not include treatment variable(s)! For efficiency, it is recommended to use the **fixest** convention of specifying fixed effects after a vertical bar. For example, `~ x1 + x2 | fe1 + fe2`. -- `second_stage`: Formula indicating the treatment variable or, in the case of event studies, treatment variables. Again, following **fixest** conventions, it is recommended to use the [`i()`](https://lrberge.github.io/fixest/reference/i.html) syntax. For example, `~ i(time_to_treatment, ref = 0)`. +- `second_stage`: Formula indicating the treatment variable or, in the case of event studies, treatment variables. Again, following **fixest** conventions, it is recommended to use the [`i()`](https://lrberge.github.io/fixest/reference/i.html) syntax. For example, `~ i(time_to_treatment)`. - `treatment`: A binary (1/0) or logical (TRUE/FALSE) variable demarcating when treatment turns on for a unit. For example, `"treated"`. Optional arguments include the ability to implement weighted regressions and whether to cluster or bootstrap standard errors. @@ -101,7 +101,7 @@ Let's walk through an example dataset from the package. ```{r load-data, code_folding=TRUE,} library(did2s) ## The main package. Will automatically load fixest as well. -library(ggplot2) +library(ggplot2) ## Load heterogeneous treatment dataset from the package data("df_het") @@ -111,9 +111,8 @@ head(df_het) Here is a plot of the average outcome variable for each of the groups: ```{r plot-df-het, fig.width=8, fig.height=4, fig.cap="Example data with heterogeneous treatment effects"} - # Mean for treatment group-year -agg <- aggregate(df_het$dep_var, by=list(g = df_het$g, year = df_het$year), FUN = mean) +agg <- aggregate(df_het$dep_var, by = list(g = df_het$g, year = df_het$year), FUN = mean) agg$g <- as.character(agg$g) agg$g <- ifelse(agg$g == "0", "Never Treated", agg$g) @@ -122,18 +121,19 @@ never <- agg[agg$g == "Never Treated", ] g1 <- agg[agg$g == "2000", ] g2 <- agg[agg$g == "2010", ] - -plot(0, 0, xlim = c(1990,2020), ylim = c(4,7.2), type = "n", - main = "Data-generating Process", ylab = "Outcome", xlab = "Year") +plot(0, 0, + xlim = c(1990, 2020), ylim = c(4, 7.2), type = "n", + main = "Data-generating Process", ylab = "Outcome", xlab = "Year" +) abline(v = c(1999.5, 2009.5), lty = 2) lines(never$year, never$x, col = "#8e549f", type = "b", pch = 15) lines(g1$year, g1$x, col = "#497eb3", type = "b", pch = 17) lines(g2$year, g2$x, col = "#d2382c", type = "b", pch = 16) -legend(x=1990, y=7.1, col = c("#8e549f", "#497eb3", "#d2382c"), - pch = c(15, 17, 16), - legend = c("Never Treated", "2000", "2010")) - - +legend( + x = 1990, y = 7.1, col = c("#8e549f", "#497eb3", "#d2382c"), + pch = c(15, 17, 16), + legend = c("Never Treated", "2000", "2010") +) ``` ### Estimate Two-stage Difference-in-Differences @@ -141,15 +141,14 @@ legend(x=1990, y=7.1, col = c("#8e549f", "#497eb3", "#d2382c"), First, lets estimate a static did. There are two things to note here. First, note that I can use `fixest::feols` formula including the `|` for specifying fixed effects and `fixest::i` for improved factor variable support. Second, note that `did2s` returns a `fixest` estimate object, so `fixest::esttable`, `fixest::coefplot`, and `fixest::iplot` all work as expected. ```{r static} - # Static -static <- did2s(df_het, - yname = "dep_var", first_stage = ~ 0 | state + year, - second_stage = ~i(treat, ref=FALSE), treatment = "treat", - cluster_var = "state") +static <- did2s(df_het, + yname = "dep_var", first_stage = ~ 0 | state + year, + second_stage = ~ i(treat, ref = FALSE), treatment = "treat", + cluster_var = "state" +) fixest::esttable(static) - ``` This is very close to the true treatment effect of ~2.23. @@ -157,48 +156,50 @@ This is very close to the true treatment effect of ~2.23. Then, let's estimate an event study did. Note that relative year has a value of `Inf` for never treated, so I put this as a reference in the second stage formula. ```{r event-study} - # Event Study es <- did2s(df_het, - yname = "dep_var", first_stage = ~ 0 | state + year, - second_stage = ~i(rel_year, ref=c(-1, Inf)), treatment = "treat", - cluster_var = "state") - + yname = "dep_var", first_stage = ~ 0 | state + year, + second_stage = ~ i(rel_year, ref = c(Inf)), treatment = "treat", + cluster_var = "state" +) ``` And plot the results: ```{r plot-es, fig.cap="Event-study plot with example data"} - -fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5) +fixest::iplot(es, main = "Event study: Staggered treatment", xlab = "Relative time to treatment", col = "steelblue", ref.line = -0.5, drop = "Inf") # Add the (mean) true effects -true_effects = head(tapply((df_het$te + df_het$te_dynamic), df_het$rel_year, mean), -1) +true_effects <- head(tapply((df_het$te + df_het$te_dynamic), df_het$rel_year, mean), -1) points(-20:20, true_effects, pch = 20, col = "black") # Legend -legend(x=-20, y=3, col = c("steelblue", "black"), pch = c(20, 20), - legend = c("Two-stage estimate", "True effect")) - +legend( + x = -20, y = 3, col = c("steelblue", "black"), pch = c(20, 20), + legend = c("Two-stage estimate", "True effect") +) ``` ### Comparison to TWFE ```{r plot-compare, ig.cap="TWFE and Two-Stage estimates of Event-Study"} - -twfe = feols(dep_var ~ i(rel_year, ref=c(-1, Inf)) | unit + year, data = df_het) - -fixest::iplot(list(es, twfe), sep = 0.2, ref.line = -0.5, - col = c("steelblue", "#82b446"), pt.pch = c(20, 18), - xlab = "Relative time to treatment", - main = "Event study: Staggered treatment (comparison)") +twfe <- feols(dep_var ~ i(rel_year, ref = c(-1, Inf)) | unit + year, data = df_het) + +fixest::iplot(list(es, twfe), + sep = 0.2, ref.line = -0.5, + col = c("steelblue", "#82b446"), pt.pch = c(20, 18), + xlab = "Relative time to treatment", + main = "Event study: Staggered treatment (comparison)", + drop = "Inf" +) # Legend -legend(x=-20, y=3, col = c("steelblue", "#82b446"), pch = c(20, 18), - legend = c("Two-stage estimate", "TWFE")) - +legend( + x = -20, y = 3, col = c("steelblue", "#82b446"), pch = c(20, 18), + legend = c("Two-stage estimate", "TWFE") +) ``` # Citation