diff --git a/.gitignore b/.gitignore
index 4674983..702d007 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
 /reference
 /graphs
 /output/*
+*_reprex.*
diff --git a/R/constructing_interaction_graphs.R b/R/constructing_interaction_graphs.R
new file mode 100644
index 0000000..7e969b5
--- /dev/null
+++ b/R/constructing_interaction_graphs.R
@@ -0,0 +1,64 @@
+library(docxtractr)
+library(tidyverse)
+library(patchwork)
+
+subg_doc <- read_docx("data/ate_paper_subgroups.docx")
+
+subg_tables <- subg_doc$tbls |> 
+  imap(\(tbl, i) {
+    
+    docx_extract_tbl(subg_doc, i) |> 
+      filter(row_number() < n())
+    
+  })
+
+interaction_terms <- subg_tables |> 
+  reduce(bind_rows) |> 
+  janitor::clean_names() |> 
+  filter(!is.na(interaction_term)) |> 
+  separate(interaction_term, into = c("axis", "subgroup"), sep = ":") |> 
+  select(outcome, axis, subgroup, effect_size, standard_error, x95_ci, subgroup_unweighted_n) |> 
+  separate(x95_ci, into = c("li", "ul"), sep = ", ", convert = TRUE) |> 
+  mutate(effect_size = as.numeric(effect_size),
+         standard_error = as.numeric(standard_error),
+         outcome = fct_inorder(outcome))
+
+
+interaction_plots <- interaction_terms |>
+  nest(results = -axis) |>
+  mutate(gp = map2(results, axis, \(data, title) {
+    
+    effect_labs <- data |>
+      mutate(label = glue::glue(
+        "{effect} ({lower}, {upper})",
+        effect = sprintf("%.2f", effect_size),
+        lower = sprintf("%.2f", li),
+        upper = sprintf("%.2f", ul)
+      )) |> 
+      ggplot(aes(1, fct_rev(outcome), label = label, colour = subgroup)) +
+      geom_text(position = position_dodge(width = 0.5), fontface ="bold") +
+      theme_void() +
+      scale_colour_brewer(palette = "Set1") +
+      theme(legend.position = "none")
+    
+    main_plot <- ggplot(data, aes(effect_size, fct_rev(outcome), colour = subgroup)) + 
+      geom_point(position = position_dodge(width = 0.5)) + 
+      geom_errorbar(aes(xmin = li, xmax = ul), width = 0.3, position = position_dodge(width = 0.5)) +
+      geom_vline(xintercept = 0, linetype = "dashed") +
+      scale_y_discrete("Well-being outcome") +
+      scale_colour_brewer("", palette = "Set1", guide = guide_legend(reverse = TRUE)) +
+      theme_minimal() +
+      scale_x_continuous("Effect estimate (95%CI)") +
+      theme(legend.position = "bottom", text = element_text(size = 12))
+    
+    main_plot + effect_labs + plot_layout(widths = c(5, 2)) +
+      plot_annotation(title = glue::glue("Interaction effects by {title}"))
+  }))
+
+interaction_plots$gp[1]
+
+interaction_plots$gp |> 
+  iwalk(\(plot, number) ggsave(glue::glue("graphs/SPAplot{number}.png"), plot, width = 150, height = 120, units = "mm"))
+
+
+ggsave(interaction_plots$gp[12][[1]], filename = ggsave("graphs/SPAplot12.png"), width = 150, height = 150, units = "mm")
diff --git a/R/estimating_comparable_effects.R b/R/estimating_comparable_effects.R
new file mode 100644
index 0000000..d8474ae
--- /dev/null
+++ b/R/estimating_comparable_effects.R
@@ -0,0 +1,130 @@
+library(tidyverse)
+library(lubridate)
+library(fst)
+
+
+pre_post_covid <- c(
+  "pre_c" = "data/fst_files/apr19_mar20.fst",
+  "post_c" = "data/fst_files/apr20_mar21.fst"
+) |> map(read_fst, c(
+  "satis", 
+  "anxious", 
+  "happy", 
+  "worth", 
+  "pwta20",
+  "refdte"),
+  as.data.table = TRUE)
+
+pre_post_dat <- pre_post_covid |> 
+  imap(~mutate(.x, period = .y)) |> 
+  reduce(bind_rows) |> 
+  mutate(across(satis:worth, ~ ifelse(.x == -9|.x == -8, NA, .x)),
+         Date = floor_date(dmy(refdte), "months"),
+         Month = factor(month(Date)),
+         Year = factor(year(Date)),
+         period = fct_inorder(period)) 
+
+pre_post_dat |> 
+  pivot_longer(satis:worth, names_to = "metric", values_to = "Score") |> 
+  filter(!is.na(Score)) |> 
+  ggplot(aes(period, Score)) +
+  stat_summary(geom = "pointrange", shape = 15,
+               fun.data = mean_se) +
+  facet_wrap(~metric, scale = "free_y")
+
+library(fixest)
+
+tibble(outcome = c("satis", "happy", "worth", "anxious")) |> 
+  mutate(model = map(outcome, \(outcome) {
+    
+    feols(as.formula(paste(outcome, "~ period | Month")), data = pre_post_dat, weights = ~pwta20)
+    
+  })) |> 
+  mutate(coefs = map(model, broom::tidy, conf.int = TRUE)) |> 
+  unnest(coefs) |> 
+  filter(term == "periodpost_c")
+
+
+pre_post_dat |> 
+  as_tibble() |> 
+  pivot_longer(satis:worth, names_to = "outcome", values_to = "score") |> 
+  ggplot(aes(Date, score)) +
+  stat_summary(geom = "line", fun = mean) +
+  stat_summary(fun.data = mean_cl_normal, aes(colour = Year)) +
+  geom_vline(xintercept = ymd("20200401")) +
+  facet_wrap(~outcome, scales = "free_y")
+
+# March is start of exposure effects (anticipatory)
+
+# longer lead-in ----------------------------------------------------------
+
+
+apr11_mar21 <- c(
+  `2013` = "data/fst_files/apr13_mar14.fst",
+  `2014` = "data/fst_files/apr14_mar15.fst",
+  `2015` = "data/fst_files/apr15_mar16.fst",
+  `2016` = "data/fst_files/apr16_mar17.fst",
+  `2017` = "data/fst_files/apr17_mar18.fst",
+  `2018` = "data/fst_files/apr18_mar19.fst"
+) |> map(read_fst, c(
+  "satis", 
+  "anxious", 
+  "happy", 
+  "worth", 
+  "refdte",
+  "pwta18"),
+  as.data.table = TRUE) |> 
+  c(
+    c(
+  `2019` = "data/fst_files/apr19_mar20.fst",
+  `2020` = "data/fst_files/apr20_mar21.fst"
+) |> map(read_fst, c(
+  "satis", 
+  "anxious", 
+  "happy", 
+  "worth", 
+  "refdte",
+  "pwta20"),
+  as.data.table = TRUE) 
+  )
+
+
+
+
+longer_data <- apr11_mar21 |> 
+  imap(~mutate(.x, period = .y)) |> 
+  map(rename_with, \(x) str_replace(x, "pwta\\d{2}", "weight")) |> 
+  reduce(bind_rows) |> 
+  mutate(across(satis:worth, ~ ifelse(.x == -9|.x == -8, NA, .x)),
+         Date = floor_date(dmy(refdte), "months"),
+         Month = factor(month(Date)),
+         Year = factor(year(Date)),
+         period = fct_inorder(period) |> as.integer(),
+         covid = as.integer(Date >= ymd("20200301"))) 
+
+longer_data |> 
+pivot_longer(satis:worth, names_to = "metric", values_to = "Score") |> 
+  filter(!is.na(Score)) |> 
+  ggplot(aes(Year, Score)) +
+  stat_summary(geom = "pointrange", shape = 15,
+               fun.data = mean_se) +
+  facet_wrap(~metric, scale = "free_y")
+
+tibble(outcome = c("satis", "happy", "worth", "anxious")) |> 
+  mutate(model = map(outcome, \(outcome) {
+    
+    feols(as.formula(paste(outcome, "~ covid + Year | Month")), data = longer_data, weights = ~weight)
+    
+  })) |> 
+  mutate(coefs = map(model, broom::tidy, conf.int = TRUE)) |> 
+  unnest(coefs) |> 
+  filter(term == "covid")
+
+read_table("
+UC	COVID
+-0.66	-0.23
+-0.41	-0.23
+-0.73	-0.12
++0.79	+0.43
+") |> 
+  mutate(mult = UC/COVID)
diff --git a/R/full_data_test_diffs.R b/R/full_data_test_diffs.R
index 1425dcb..f843d75 100644
--- a/R/full_data_test_diffs.R
+++ b/R/full_data_test_diffs.R
@@ -2,7 +2,6 @@ library(fst)
 library(tidyverse)
 library(data.table)
 library(survey)
-library(SPHSUgraphs)
 
 
 satis_income_df <-
diff --git a/R/graphing_comparative_results.R b/R/graphing_comparative_results.R
new file mode 100644
index 0000000..9a3faa6
--- /dev/null
+++ b/R/graphing_comparative_results.R
@@ -0,0 +1,33 @@
+library(tidyverse)
+
+results <- tribble(
+  ~ metric, ~text,
+  "Life Satisfaction", "-0.066 point drop in life satisfaction (-0.087 to -0.044)",
+  "Happiness", "-0.068 point drop in happiness (-0.094 to -0.042)",
+  "Anxiety", "0.065 point increase in anxiety (0.031 to 0.098)",
+  "Life Worthwhile", "-0.074 point drop in life being worthwhile (-0.095 to -0.053)",
+  "Life Satisfaction", "-0.12 point drop in life satisfaction (-0.15 to -0.09)",
+  "Happiness", "-0.11 point drop in happiness (-0.15 to -0.07)",
+  "Anxiety", "0.12 point increase in anxiety (0.07 to 0.17)",
+  "Life Worthwhile", "-0.12 point drop in life being worthwhile (-0.15 to -0.09)"
+) |> 
+  mutate(
+    estimate = str_extract(text, "-?0\\.\\d*") |> as.numeric(),
+    cl = str_extract(text, "(?<=\\()-?0\\.\\d*") |> as.numeric(),
+    cu = str_extract(text, "(?<=to )-?0\\.\\d*") |> as.numeric(),
+    time = c(rep("Within 1y", 4), rep(">20% threshold", 4))
+  )
+
+results |> 
+  ggplot(aes(estimate, metric, colour = time)) +
+  geom_point(position = position_dodge(width = 0.5)) +
+  geom_linerange(aes(xmin = cl, xmax = cu), position = position_dodge(width = 0.5)) +
+  geom_vline(xintercept = 0) +
+  SPHSUgraphs::theme_sphsu_light() +
+  SPHSUgraphs::scale_colour_sphsu()
+
+
+results |> 
+  mutate(prop_rec = if_else(time == "Within 1y", 0.041, 0.099),
+         scaled_eff = estimate / prop_rec) |> 
+  select(metric, time, scaled_eff)
diff --git a/R/graphing_respondents_conditions.R b/R/graphing_respondents_conditions.R
index a090957..8e0bca6 100644
--- a/R/graphing_respondents_conditions.R
+++ b/R/graphing_respondents_conditions.R
@@ -72,7 +72,7 @@ apr14_mar21_dt <- apr14_mar21 |>
   mutate(date = floor_date(dmy(refdte), "months")) |> 
   data.table()
 
-apr14_mar21_dt <- apr14_mar21_dt[age > 17 & age < 66]
+apr14_mar21_dt <- apr14_mar21_dt[age >= 16 & age <= 64]
 
 apr14_mar21_dt[,uc := rowSums(.SD == 1), .SD = 2:11]
 apr14_mar21_dt[,tax_cr := rowSums(.SD == 3), .SD = 2:11]
@@ -137,8 +137,8 @@ patch +
                   subtitle = "April 2014 - March 2021")
 
 
-ggsave(filename = "graphs/proporrions_benefit_type_weighted.png", width = 20, height = 24,
-       units = "cm", dpi = 400)
+# ggsave(filename = "graphs/proporrions_benefit_type_weighted.png", width = 20, height = 24,
+       # units = "cm", dpi = 400)
 
 
 (n_claim <- apr14_mar21_dt |> 
@@ -214,4 +214,4 @@ comb_uc_rec |>
   plot_annotation(title = "Proportions of working-age adults reporting receiving UC vs observed rates",
                   subtitle = "April 2014 - March 2021")
 
-ggsave(filename = "graphs/comparison_uc_reporting.png", width = 20, height = 10, dpi = 400, units = "cm")
+# ggsave(filename = "graphs/comparison_uc_reporting.png", width = 20, height = 10, dpi = 400, units = "cm")
diff --git a/R/power_calcs.R b/R/power_calcs.R
new file mode 100644
index 0000000..cabe7fb
--- /dev/null
+++ b/R/power_calcs.R
@@ -0,0 +1,47 @@
+library(fst)
+
+aps_2021 <- read_fst("data/fst_files/apr20_mar21.fst")
+library(tidyverse)
+
+satis_income_df <-
+  c(
+    "data/fst_files/apr11_mar12.fst",
+    "data/fst_files/apr12_mar13.fst",
+    "data/fst_files/apr13_mar14.fst",
+    "data/fst_files/apr14_mar15.fst",
+    "data/fst_files/apr15_mar16.fst",
+    "data/fst_files/apr16_mar17.fst",
+    "data/fst_files/apr17_mar18.fst",
+    "data/fst_files/apr18_mar19.fst",
+    "data/fst_files/apr19_mar20.fst",
+    "data/fst_files/apr20_mar21.fst"
+  ) |> map(read_fst, columns = c("refdte", "age", "satis"))
+
+# df <- read_fst("data/fst_files/apr19_mar20.fst")
+
+names(satis_income_df) <- 2011:2020
+
+stats <- tibble(year = as.character(2011:2020), data = satis_income_df[year]) |> 
+  unnest(data) |> 
+  mutate(satis = na_if(satis, -9)) |> 
+  filter(age < 20, !is.na(satis)) |> 
+  summarise(mean = mean(satis), sd = sd(satis))
+
+library(pwr)
+
+pwr.t.test(
+  n = 385000,
+  d = 0.05 / stats$sd,
+  sig.level = 0.01
+)
+
+stats$mean
+
+
+pwr.t.test(d = 0.2, power = 0.8, sig.level = 0.05)
+
+aps_2021 |> 
+  as_tibble() |> 
+  select(age, satis) |> 
+  filter(age < 20, !is.na(satis)) |> 
+  summarise(mean = mean(satis), sd = sd(satis))
diff --git a/R/recreating_main_graph.R b/R/recreating_main_graph.R
new file mode 100644
index 0000000..d951c92
--- /dev/null
+++ b/R/recreating_main_graph.R
@@ -0,0 +1,96 @@
+library(tidyverse)
+library(docxtractr)
+library(patchwork)
+
+main_doc <- read_docx("data/ate_paper_outputs.docx")
+
+main_tables <- main_doc$tbls |> 
+  imap(\(tbl, i) {
+    
+    docx_extract_tbl(main_doc, i) |> 
+      janitor::clean_names() |> 
+      filter(row_number()< n())
+    
+  })
+
+
+life_sat <- main_tables[[7]]
+happy <- main_tables[[13]]
+life_worth <- main_tables[[19]]
+anxiety <- main_tables[[25]]
+
+plotting_data <- list(
+  "Life Satisfaction" = life_sat,
+  "Happiness" = happy,
+  "Life Worthwhile" = life_worth,
+  "Anxiety" = anxiety
+) |> 
+  imap(~mutate(.x, outcome = .y)) |> 
+  reduce(bind_rows) |> 
+  separate_wider_delim(x95_ci, delim = ", ", names = c("ll", "ul")) |> 
+  mutate(across(estimate:ul, as.numeric), outcome = fct_inorder(outcome) |> fct_rev())
+
+effect_labs <- plotting_data |>
+  mutate(label = glue::glue(
+    "{effect} ({lower}, {upper})",
+    effect = sprintf("%.2f", estimate),
+    lower = sprintf("%.2f", ll),
+    upper = sprintf("%.2f", ul)
+  )) |> 
+  ggplot(aes(1, outcome, label = label)) +
+  geom_text(position = position_dodge(width = 0.5), fontface ="bold") +
+  theme_void() +
+  theme(legend.position = "none")
+
+main_plot <- plotting_data |> 
+  ggplot(aes(estimate, outcome)) + 
+  geom_point(position = position_dodge(width = 0.5)) + 
+  geom_errorbar(aes(xmin = ll, xmax = ul), width = 0.3, position = position_dodge(width = 0.5)) +
+  geom_vline(xintercept = 0, linetype = "dashed") +
+  scale_y_discrete("Well-being outcome") +
+  theme_minimal() +
+  scale_x_continuous("Effect estimate (95%CI)") +
+  theme(legend.position = "bottom", text = element_text(size = 12))
+
+main_plot + effect_labs + plot_layout(widths = c(5, 2)) +
+  plot_annotation(caption = "Unweighted N = 245,658")
+
+ggsave("graphs/main_outcome.svg", height = 10.7*1.2, width = 17.2*1.2, units = "cm")
+ggsave("graphs/main_outcome.tiff", height = 10.7*1.2, width = 17.2*1.2, units = "cm", compression = "lzw", dpi = 600)
+
+
+
+# event study plots -------------------------------------------------------
+
+
+life_sat <- main_tables[[8]]
+happy <- main_tables[[14]]
+life_worth <- main_tables[[20]]
+anxiety <- main_tables[[26]]
+
+plotting_data <- list(
+  "Life Satisfaction" = life_sat,
+  "Happiness" = happy,
+  "Life Worthwhile" = life_worth,
+  "Anxiety" = anxiety
+) |> 
+  imap(~mutate(.x, outcome = .y)) |> 
+  reduce(bind_rows) |> 
+  separate_wider_delim(x95_ci, delim = ", ", names = c("ll", "ul")) |> 
+  mutate(across(-outcome, as.numeric), outcome = fct_inorder(outcome) |> fct_rev())
+
+
+plotting_data |> 
+  ggplot(aes(quarter_relative_to_uc_rollout, estimate)) +
+  geom_vline(xintercept = -0.5, linetype = "dashed", colour = "red") +
+  geom_hline(yintercept = 0, colour = "grey") +
+  geom_point() +
+  geom_linerange(aes(ymin = ll, ymax = ul)) +
+  facet_wrap(~outcome, scales = "free_y") +
+  scale_x_continuous("Quarter relative to UC rollout") +
+  theme_minimal() +
+  scale_y_continuous("Effect estimate (95%CI)") +
+  labs(caption = "Unweighted N = 245,658")
+
+ggsave("graphs/event_study_plots.svg", height = 15*1.2, width = 19*1.2, units = "cm")
+ggsave("graphs/event_study_plots.tiff", height = 15*1.2, width = 19*1.2, units = "cm", compression = "lzw", dpi = 600)
diff --git a/R/subgroup graphing.R b/R/subgroup graphing.R
new file mode 100644
index 0000000..a90480c
--- /dev/null
+++ b/R/subgroup graphing.R	
@@ -0,0 +1,69 @@
+library(tidyverse)
+library(officer)
+library(docxtractr)
+library(SPHSUgraphs)
+library(patchwork)
+
+theme_set(theme_sphsu_light())
+
+subgroup_doc <- read_docx("data/ate_subgroups_differencing_effects.docx")
+
+tables <- map(1:24, ~docx_extract_tbl(subgroup_doc, tbl_number = .x))
+
+end_table <- docx_extract_tbl(subgroup_doc, tbl_number = 25)
+
+library(readxl)
+
+end_table <- read_xlsx("data/concat_subgroups.xlsx", skip = 2, col_types = c(
+  "text", 
+  "text",
+  "numeric",
+  "text",
+  "numeric",
+  "text",
+  "numeric",
+  "text",
+  "numeric",
+  "text"
+  ))
+
+subgroup_interactions <- end_table |> 
+  filter(!is.na(Comparison)) |> 
+  pivot_longer(
+    -c(Subgroup, Comparison), 
+    names_to = c("Metric", ".value"),
+    names_sep = "_"
+    ) |> 
+  separate_wider_delim(ends_with("95% CI"), " to ", names = c("ll", "ul")) |> 
+  mutate(across(c(ll, ul), as.numeric))
+
+subgroup_interactions |> 
+  mutate(se = (ul - ll)/(2 * 1.96)) |> 
+  rowwise() |> 
+  mutate(p = 2 * pnorm(-abs(estimate), sd = se, lower.tail = TRUE),
+         signif = p < 0.05) |> 
+  ggplot(aes(Metric, Comparison, fill = signif)) +
+  geom_tile()
+
+subgroup_interactions |> 
+  mutate(Metric = fct_inorder(Metric) |> fct_rev()) |> 
+  nest(data = -Subgroup) |> 
+  mutate(graph = map2(data, Subgroup, \(df, subg) {
+    
+    df |> 
+      ggplot(aes(estimate, Metric, colour = Comparison)) +
+      geom_point(position = position_dodge(width = 0.5)) +
+      geom_errorbar(aes(xmin = ll, xmax = ul), width = 0.2, 
+                    position = position_dodge(width = 0.5)) +
+      geom_vline(xintercept = 0, linetype = "dotted") +
+      scale_color_brewer(name = subg, palette = "Set1") +
+        facet_wrap(~Comparison) +
+      theme_minimal()
+    
+  })) |> 
+  group_by(Subgroup) |> 
+  group_walk(\(df, subg) {
+    
+    ggsave(plot = df$graph[[1]], filename = glue::glue("graphs/subg_{subg}.png"))
+    
+  })
diff --git a/R/test_trends.R b/R/test_trends.R
new file mode 100644
index 0000000..c4c1a1a
--- /dev/null
+++ b/R/test_trends.R
@@ -0,0 +1,148 @@
+library(fst)
+library(tidyverse)
+library(data.table)
+
+cols_present <- read_fst("data/fst_files/apr17_mar18.fst", to  = 10)
+
+cols_present$npwt18
+
+wb_df <-
+  c(
+    # "data/fst_files/apr11_mar12.fst",
+    # "data/fst_files/apr12_mar13.fst",
+    # "data/fst_files/apr13_mar14.fst",
+    "data/fst_files/apr14_mar15.fst",
+    "data/fst_files/apr15_mar16.fst",
+    "data/fst_files/apr16_mar17.fst",
+    "data/fst_files/apr17_mar18.fst",
+    "data/fst_files/apr18_mar19.fst"
+    # "data/fst_files/apr19_mar20.fst"
+    # "data/fst_files/apr20_mar21.fst"
+  ) |> map(\(file_name) {
+    read_fst(file_name,
+             columns = c("age", 
+                         "satis", 
+                         "worth", 
+                         "anxious", 
+                         "happy", 
+                         "statr", 
+                         "ilodefr", 
+                         "refdte",
+                         "npwt18",
+                         "gor9d"))
+  })
+
+# df <- read_fst("data/fst_files/apr19_mar20.fst")
+
+names(wb_df) <- 2014:2018
+
+tidy_df <- enframe(wb_df, "datayear", "dat") |> 
+  unnest(dat) |> 
+  mutate(
+    Date = floor_date(dmy(refdte), "months"),
+    across(satis:happy, ~na_if(.x, -9)),
+    month = month(Date) + (year(Date) - 2014) * 12,
+    q = quarter(Date) + (year(Date) - 2014) * 4
+    )
+
+library(did)
+library(magrittr)
+library(plm)
+
+pseudo_rollouts <- tidy_df |>
+  filter(year(Date) > 2014) |> 
+  select(gor9d, rollout = month, r_q = q) |> 
+  slice_sample(n = 1, by = gor9d)
+
+tidy_df <- tidy_df |> 
+  left_join(pseudo_rollouts, by = join_by(gor9d)) |> 
+  mutate(lag_rollout = factor(month - rollout),
+         lag_q = factor(q - r_q),
+         id = as.numeric(as.factor(gor9d)))
+
+
+# twfe model --------------------------------------------------------------
+
+model_twfe <- tidy_df |> 
+  mutate(uc = (q <= r_q) * 1) %$%
+  lm(anxious ~ uc + poly(age, 3) + ilodefr + datayear + gor9d, weights = npwt18)
+
+broom::tidy(model_twfe)
+
+
+# fixest model ------------------------------------------------------------
+
+library(fixest)
+
+model_feols <- tidy_df |> 
+  mutate(uc = (q <= r_q) * 1) |> 
+  feols(anxious ~ uc + poly(age, 3) + ilodefr + datayear | r_q + gor9d,
+        data = _,
+        weights = ~npwt18)
+
+summary(model_feols)
+
+# panel_model -------------------------------------------------------------
+
+
+panel_model <-  plm(satis ~ lag_q, data = tidy_df, model = "within", effect = "twoways", index = c("gor9d", "q"))
+
+
+summary(panel_model)
+
+
+# some housekeeping for making the plot
+# add 0 at event time -1
+coefs <- coef(panel_model)
+ses <- sqrt(diag(summary(panel_model)$vcov))
+# coefs <- c(coefs1[idx.pre], 0, coefs1[idx.post])
+# ses <- c(ses1[idx.pre], 0, ses1[idx.post])
+exposure <- -24:23
+
+cmat <- data.frame(coefs=coefs, ses=ses, exposure=exposure)
+
+library(ggplot2)
+
+ggplot(data = cmat, mapping = aes(y = coefs, x = exposure)) +
+  geom_line(linetype = "dashed") +
+  geom_point() + 
+  geom_errorbar(aes(ymin = (coefs-1.96*ses), ymax = (coefs+1.96*ses)), width = 0.2) +
+  ylim(c(-2, 5)) +
+  theme_bw()
+
+# Using did methods
+
+did_att_gt <- att_gt(yname = "anxious",
+                     tname = "q",
+                     idname = "id",
+                     gname = "rollout",
+                     xformla = ~ ilodefr,
+                     data = tidy_df,
+                     control_group = "notyettreated",
+                     clustervars = "gor9d",
+                     weightsname = "npwt18",
+                     bstrap = TRUE,
+                     cband = FALSE,
+                     panel = FALSE)
+
+
+summary(did_att_gt)
+
+# aggregate them into event study plot
+did_es <- aggte(did_att_gt, type = "dynamic")
+
+# plot the event study
+ggdid(did_es)
+
+aggte(did_att_gt, type = "simple")
+
+conditional_did_pretest(
+  yname = "anxious",
+  tname = "month",
+  gname = "rollout",
+  idname = "id",
+  xformla = ~ilodefr,
+  data = tidy_df |> filter(month < max(rollout)),
+  panel = FALSE,
+  bstrap = FALSE
+)
diff --git a/R/testing_income_distribution.R b/R/testing_income_distribution.R
new file mode 100644
index 0000000..32afc57
--- /dev/null
+++ b/R/testing_income_distribution.R
@@ -0,0 +1,97 @@
+library(fst)
+library(data.table)
+library(tidyverse)
+library(dtplyr)
+library(lubridate)
+library(SPHSUgraphs)
+library(patchwork)
+
+theme_set(theme_sphsu_light())
+
+apr14_mar21 <- c(
+  "data/fst_files/apr14_mar15.fst",
+  "data/fst_files/apr15_mar16.fst",
+  "data/fst_files/apr16_mar17.fst",
+  "data/fst_files/apr17_mar18.fst",
+  "data/fst_files/apr18_mar19.fst",
+  "data/fst_files/apr19_mar20.fst",
+  "data/fst_files/apr20_mar21.fst"
+) |> map(read_fst, c(
+  "grsswk",
+  "refdte"))
+
+
+apr14_mar21 |>
+  reduce(bind_rows) |>
+  mutate(grsswk = if_else(grsswk < 0, 0, grsswk)) |>
+  ggplot(aes(grsswk)) +
+  geom_histogram(aes(y = after_stat(count / sum(count))))
+
+apr14_mar21 |>
+  reduce(bind_rows) |> 
+  filter(grsswk == max(grsswk)) |> 
+  mutate(income = grsswk * 52)
+
+apr14_mar21 |>
+  reduce(bind_rows) |>
+  mutate(
+    income = cut(
+      grsswk,
+      breaks = c(-9, -8, 0, seq(1, 170, length.out = 5), seq(231, max(grsswk), length.out = 15)),
+      right = FALSE,
+      include.lowest = TRUE
+    ),
+      included = factor(
+        case_when(
+          grsswk < 0 ~ "Missing (included by default)",
+          grsswk > 230 ~ "High income",
+          .default = "Low income"
+        )
+      ),
+  ) |> 
+  ggplot(aes(income, fill = included)) +
+  stat_count(geom = "bar") +
+  scale_fill_discrete("") +
+  scale_x_discrete("Annual income in employment", labels = c("-9", "-8", ">£0", rep("", 4), ">£12,000", rep("", 12), "£41,000+"))
+  
+# all income --------------------------------------------------------------
+
+
+top_100 <- read_fst("data/fst_files/apr14_mar15.fst", 
+         to = 100)
+
+top_100 |> 
+  select(starts_with("band"), starts_with("gr"), starts_with("inc"), starts_with("net")) |> names() |> dput()
+
+
+earnings_21 <- read_fst("data/fst_files/apr20_mar21.fst", 
+                    columns = c("bandg", "bandg2", "bandn", "bandn2", "gross99", "grsexp", 
+                             "grsprd", "grsswk", "grsswk2", "incnow", "incsup", "net99", "netprd", 
+                             "netwk", "netwk2"))
+
+
+earnings_nas <- earnings_21 |> 
+  tibble() |> 
+  mutate(across(.fns = as.numeric)) |> 
+  mutate(across(.fns = ~ if_else(.x == -9, NA_real_, .x))) 
+
+earnings_nas |> 
+  summarise(across(.fns = ~sum(is.na(.x))/n()))
+
+earnings_nas |> 
+  select(gross99, grsswk, incnow, net99) |> 
+  pivot_longer(
+    everything(),
+    names_to = "metric",
+    values_to = "value"
+  ) |> 
+  replace_na(list(value = 0)) |> 
+  ggplot(aes(value)) +
+  geom_histogram() +
+  facet_wrap(~metric)
+
+ukmod_income <- fread(file = "data/ukmod_out/uk_2019_UCAon.txt")
+
+ukmod_income |> 
+  ggplot(aes(yem)) +
+  geom_histogram()
diff --git a/R/testing_sarima_on_wellbeing.R b/R/testing_sarima_on_wellbeing.R
index d109960..83d3180 100644
--- a/R/testing_sarima_on_wellbeing.R
+++ b/R/testing_sarima_on_wellbeing.R
@@ -1,5 +1,6 @@
 library(tidyverse)
 library(lubridate)
+library(data.table)
 library(fst)
 
 apr11_mar21 <- c(
@@ -79,8 +80,8 @@ plot(diff(diff(bare$score, 12)), type = "l")
 
 acf2(diff(diff(bare$score, 12)))
 
-mod1 <- auto.arima(bare$score[1:50], 
-           # xreg = as.matrix(c("exp", "t_exp")]),
+mod1 <- auto.arima(bare$score, 
+           xreg = as.matrix(bare[c("exp", "t_exp")]),
            approximation = FALSE, allowdrift = FALSE,
            seasonal = TRUE, trace = TRUE)
 
diff --git a/reports/outline_paper_parts.Rmd b/reports/outline_paper_parts.Rmd
index 8b101c0..7ff5d25 100644
--- a/reports/outline_paper_parts.Rmd
+++ b/reports/outline_paper_parts.Rmd
@@ -15,6 +15,10 @@ editor_options:
     wrap: 72
 ---
 
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(warning = FALSE, echo = FALSE, message = FALSE, fig.height=8.0, fig.width=12.0, compression="lzw", dev="tiff", dpi=500, out.width=900)
+```
+
 # Abstract
 
 #### Background
@@ -266,10 +270,9 @@ x% of respondents. The variable with the highest rate of missingness was
 earned income, with data missing for x% of eligible respondents. We
 generated [20] imputed datasets.
 
-We additionally carried out a 'complete
-case' analysis of all observations with complete data. We compared
-effect estimates between complete and imputed samples to examine
-robustness of findings.
+We additionally carried out a 'complete case' analysis of all
+observations with complete data. We compared effect estimates between
+complete and imputed samples to examine robustness of findings.
 
 ## Statistical analysis
 
@@ -292,6 +295,139 @@ and standard error term for each quarterly period before and after
 introduction of Universal Credit to examine timings and patters of
 effects.
 
+# Graphing conditions across exposure period
+
+```{r claim_patterns}
+library(fst)
+library(data.table)
+library(tidyverse)
+library(dtplyr)
+library(lubridate)
+library(SPHSUgraphs)
+library(patchwork)
+
+theme_set(theme_sphsu_light())
+
+here::i_am("reports/outline_paper_parts.Rmd")
+
+apr14_mar21 <- c(
+  here::here("data", "fst_files", "apr14_mar15.fst"),
+  here::here("data", "fst_files", "apr15_mar16.fst"),
+  here::here("data", "fst_files", "apr16_mar17.fst"),
+  here::here("data", "fst_files", "apr17_mar18.fst"),
+  here::here("data", "fst_files", "apr18_mar19.fst"),
+  here::here("data", "fst_files", "apr19_mar20.fst"),
+  here::here("data", "fst_files", "apr20_mar21.fst")
+) |> map(read_fst, c(
+  "age",
+  "tpbn1301", 
+  "tpbn1302", 
+  "tpbn1303", 
+  "tpbn1304", 
+  "tpbn1305", 
+  "tpbn1306", 
+  "tpbn1307", 
+  "tpbn1308", 
+  "tpbn1309", 
+  "tpbn1310", 
+  "refdte"))
+
+wts1 <- c(
+  here::here("data", "fst_files", "apr14_mar15.fst"),
+  here::here("data", "fst_files", "apr15_mar16.fst"),
+  here::here("data", "fst_files", "apr16_mar17.fst"),
+  here::here("data", "fst_files", "apr17_mar18.fst"),
+  here::here("data", "fst_files", "apr18_mar19.fst")
+  # "data/fst_files/apr19_mar20.fst"
+  # "data/fst_files/apr20_mar21.fst"
+) |> map(read_fst, "pwta18") |> 
+  map(`colnames<-`, "weight")
+
+wts2 <- c(here::here("data", "fst_files", "apr19_mar20.fst"),
+          here::here("data", "fst_files", "apr20_mar21.fst")) |>
+  map(read_fst, "pwta20") |> 
+  map(`colnames<-`, "weight")
+
+
+n_uc <- apr14_mar21 |> 
+  map2(append(wts1, wts2), cbind) |> 
+  reduce(bind_rows) |> 
+  mutate(date = floor_date(dmy(refdte), "months"),
+         across(tpbn1301:tpbn1310, ~.x == 1)) |> 
+  rowwise() |> 
+         mutate(uc = sum(tpbn1301:tpbn1310)*weight)
+
+apr14_mar21_dt <- apr14_mar21 |> 
+  map2(append(wts1, wts2), cbind) |> 
+  reduce(bind_rows) |> 
+  mutate(date = floor_date(dmy(refdte), "months")) |> 
+  data.table()
+
+apr14_mar21_dt <- apr14_mar21_dt[age > 17 & age < 66]
+
+apr14_mar21_dt[,uc := rowSums(.SD == 1), .SD = 2:11]
+apr14_mar21_dt[,tax_cr := rowSums(.SD == 3), .SD = 2:11]
+apr14_mar21_dt[,housing := rowSums(.SD == 2), .SD = 2:11]
+apr14_mar21_dt[,income_s := rowSums(.SD == 4), .SD = 2:11]
+apr14_mar21_dt[,jsa := rowSums(.SD == 5), .SD = 2:11]
+apr14_mar21_dt[,disab := rowSums(.SD == 6), .SD = 2:11]
+apr14_mar21_dt[,other := as.numeric(rowSums(.SD)>0), .SD = tax_cr:jsa]
+
+perc_other <- apr14_mar21_dt |> 
+  as_tibble() |> 
+  group_by(date) |>
+  select(date, other, weight) |> 
+  summarise(prop_other = sum(other * weight)/sum(weight)) |> 
+  ggplot(aes(date, prop_other)) +
+  geom_col(fill = sphsu_cols("Pumpkin"), width = 32) +
+  scale_x_date("Year") +
+  scale_y_continuous("Respondents on\nLegacy Benefits", labels = scales::percent,
+                     expand = expansion(mult = c(0, 0.05)))
+
+perc_all <- apr14_mar21_dt |> 
+  as_tibble() |> 
+  group_by(date) |>
+  select(date, uc, weight) |> 
+  summarise(prop_uc = sum(uc * weight)/sum(weight)) |> 
+  ggplot(aes(date, prop_uc)) +
+  geom_col(fill = sphsu_cols("University Blue"), width = 32, position = "identity") +
+  scale_x_date("Year") +
+  scale_y_continuous("Respondents on UC", labels = scales::percent,
+                     expand = expansion(mult = c(0, 0.05)))
+
+perc_claim <- apr14_mar21_dt |> 
+  group_by(date) |>
+  select(date, uc, other, weight) |> 
+  filter(uc == 1 | other == 1) |> 
+  mutate(benefits = case_when(
+    uc == 1 & other == 1 ~ "Combination",
+    uc == 1 ~ "Universal Credit",
+    other == 1 ~ "Legacy Benefits"
+  ),
+  benefits = factor(benefits, levels = c("Legacy Benefits", "Combination", "Universal Credit"))) |> 
+  as_tibble() |> 
+    # group_by(date, benefits) |> 
+    # summarise(n = sum(weight)) |> 
+  ggplot(aes(date, weight, fill = benefits)) +
+  # geom_col(aes(y = weight), width = 32, position = "fill") +
+    stat_summary(fun = sum, position = "fill", geom = "bar", width = 32) +
+  # stat_sum(aes(y = weight), position = "fill", geom = "bar", width = 31) +
+  # stat_count(position = "fill", geom = "bar", aes(y = after_stat(count)), width = 31, position = "identity") +
+  scale_fill_manual(name = "Benefits claimed", values = sphsu_cols("Pumpkin", "Leaf", "University Blue", names = FALSE)) +
+  scale_x_date("Year") +
+  theme(legend.position = "bottom") +
+    guides(size = guide_none()) +
+  scale_y_continuous("Claimant by benefit types", labels = scales::percent,
+                     expand = expansion(mult = c(0, 0.05)))
+
+
+patch <- (perc_other/perc_all/perc_claim) & theme(panel.grid.major.x = element_blank(),
+                              panel.grid.minor.x = element_blank()) 
+patch +
+  plot_annotation(title = "Proportions of working-age adults claiming benefit types",
+                  subtitle = "April 2014 - March 2021")
+```
+
 \newpage
 
 # References
diff --git a/reports/template_doc.docx b/reports/template_doc.docx
index 3bb70b4..85362e2 100644
Binary files a/reports/template_doc.docx and b/reports/template_doc.docx differ