diff --git a/.gitignore b/.gitignore index 4674983..702d007 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ /reference /graphs /output/* +*_reprex.* diff --git a/R/constructing_interaction_graphs.R b/R/constructing_interaction_graphs.R new file mode 100644 index 0000000..7e969b5 --- /dev/null +++ b/R/constructing_interaction_graphs.R @@ -0,0 +1,64 @@ +library(docxtractr) +library(tidyverse) +library(patchwork) + +subg_doc <- read_docx("data/ate_paper_subgroups.docx") + +subg_tables <- subg_doc$tbls |> + imap(\(tbl, i) { + + docx_extract_tbl(subg_doc, i) |> + filter(row_number() < n()) + + }) + +interaction_terms <- subg_tables |> + reduce(bind_rows) |> + janitor::clean_names() |> + filter(!is.na(interaction_term)) |> + separate(interaction_term, into = c("axis", "subgroup"), sep = ":") |> + select(outcome, axis, subgroup, effect_size, standard_error, x95_ci, subgroup_unweighted_n) |> + separate(x95_ci, into = c("li", "ul"), sep = ", ", convert = TRUE) |> + mutate(effect_size = as.numeric(effect_size), + standard_error = as.numeric(standard_error), + outcome = fct_inorder(outcome)) + + +interaction_plots <- interaction_terms |> + nest(results = -axis) |> + mutate(gp = map2(results, axis, \(data, title) { + + effect_labs <- data |> + mutate(label = glue::glue( + "{effect} ({lower}, {upper})", + effect = sprintf("%.2f", effect_size), + lower = sprintf("%.2f", li), + upper = sprintf("%.2f", ul) + )) |> + ggplot(aes(1, fct_rev(outcome), label = label, colour = subgroup)) + + geom_text(position = position_dodge(width = 0.5), fontface ="bold") + + theme_void() + + scale_colour_brewer(palette = "Set1") + + theme(legend.position = "none") + + main_plot <- ggplot(data, aes(effect_size, fct_rev(outcome), colour = subgroup)) + + geom_point(position = position_dodge(width = 0.5)) + + geom_errorbar(aes(xmin = li, xmax = ul), width = 0.3, position = position_dodge(width = 0.5)) + + geom_vline(xintercept = 0, linetype = "dashed") + + scale_y_discrete("Well-being outcome") + + scale_colour_brewer("", palette = "Set1", guide = guide_legend(reverse = TRUE)) + + theme_minimal() + + scale_x_continuous("Effect estimate (95%CI)") + + theme(legend.position = "bottom", text = element_text(size = 12)) + + main_plot + effect_labs + plot_layout(widths = c(5, 2)) + + plot_annotation(title = glue::glue("Interaction effects by {title}")) + })) + +interaction_plots$gp[1] + +interaction_plots$gp |> + iwalk(\(plot, number) ggsave(glue::glue("graphs/SPAplot{number}.png"), plot, width = 150, height = 120, units = "mm")) + + +ggsave(interaction_plots$gp[12][[1]], filename = ggsave("graphs/SPAplot12.png"), width = 150, height = 150, units = "mm") diff --git a/R/estimating_comparable_effects.R b/R/estimating_comparable_effects.R new file mode 100644 index 0000000..d8474ae --- /dev/null +++ b/R/estimating_comparable_effects.R @@ -0,0 +1,130 @@ +library(tidyverse) +library(lubridate) +library(fst) + + +pre_post_covid <- c( + "pre_c" = "data/fst_files/apr19_mar20.fst", + "post_c" = "data/fst_files/apr20_mar21.fst" +) |> map(read_fst, c( + "satis", + "anxious", + "happy", + "worth", + "pwta20", + "refdte"), + as.data.table = TRUE) + +pre_post_dat <- pre_post_covid |> + imap(~mutate(.x, period = .y)) |> + reduce(bind_rows) |> + mutate(across(satis:worth, ~ ifelse(.x == -9|.x == -8, NA, .x)), + Date = floor_date(dmy(refdte), "months"), + Month = factor(month(Date)), + Year = factor(year(Date)), + period = fct_inorder(period)) + +pre_post_dat |> + pivot_longer(satis:worth, names_to = "metric", values_to = "Score") |> + filter(!is.na(Score)) |> + ggplot(aes(period, Score)) + + stat_summary(geom = "pointrange", shape = 15, + fun.data = mean_se) + + facet_wrap(~metric, scale = "free_y") + +library(fixest) + +tibble(outcome = c("satis", "happy", "worth", "anxious")) |> + mutate(model = map(outcome, \(outcome) { + + feols(as.formula(paste(outcome, "~ period | Month")), data = pre_post_dat, weights = ~pwta20) + + })) |> + mutate(coefs = map(model, broom::tidy, conf.int = TRUE)) |> + unnest(coefs) |> + filter(term == "periodpost_c") + + +pre_post_dat |> + as_tibble() |> + pivot_longer(satis:worth, names_to = "outcome", values_to = "score") |> + ggplot(aes(Date, score)) + + stat_summary(geom = "line", fun = mean) + + stat_summary(fun.data = mean_cl_normal, aes(colour = Year)) + + geom_vline(xintercept = ymd("20200401")) + + facet_wrap(~outcome, scales = "free_y") + +# March is start of exposure effects (anticipatory) + +# longer lead-in ---------------------------------------------------------- + + +apr11_mar21 <- c( + `2013` = "data/fst_files/apr13_mar14.fst", + `2014` = "data/fst_files/apr14_mar15.fst", + `2015` = "data/fst_files/apr15_mar16.fst", + `2016` = "data/fst_files/apr16_mar17.fst", + `2017` = "data/fst_files/apr17_mar18.fst", + `2018` = "data/fst_files/apr18_mar19.fst" +) |> map(read_fst, c( + "satis", + "anxious", + "happy", + "worth", + "refdte", + "pwta18"), + as.data.table = TRUE) |> + c( + c( + `2019` = "data/fst_files/apr19_mar20.fst", + `2020` = "data/fst_files/apr20_mar21.fst" +) |> map(read_fst, c( + "satis", + "anxious", + "happy", + "worth", + "refdte", + "pwta20"), + as.data.table = TRUE) + ) + + + + +longer_data <- apr11_mar21 |> + imap(~mutate(.x, period = .y)) |> + map(rename_with, \(x) str_replace(x, "pwta\\d{2}", "weight")) |> + reduce(bind_rows) |> + mutate(across(satis:worth, ~ ifelse(.x == -9|.x == -8, NA, .x)), + Date = floor_date(dmy(refdte), "months"), + Month = factor(month(Date)), + Year = factor(year(Date)), + period = fct_inorder(period) |> as.integer(), + covid = as.integer(Date >= ymd("20200301"))) + +longer_data |> +pivot_longer(satis:worth, names_to = "metric", values_to = "Score") |> + filter(!is.na(Score)) |> + ggplot(aes(Year, Score)) + + stat_summary(geom = "pointrange", shape = 15, + fun.data = mean_se) + + facet_wrap(~metric, scale = "free_y") + +tibble(outcome = c("satis", "happy", "worth", "anxious")) |> + mutate(model = map(outcome, \(outcome) { + + feols(as.formula(paste(outcome, "~ covid + Year | Month")), data = longer_data, weights = ~weight) + + })) |> + mutate(coefs = map(model, broom::tidy, conf.int = TRUE)) |> + unnest(coefs) |> + filter(term == "covid") + +read_table(" +UC COVID +-0.66 -0.23 +-0.41 -0.23 +-0.73 -0.12 ++0.79 +0.43 +") |> + mutate(mult = UC/COVID) diff --git a/R/full_data_test_diffs.R b/R/full_data_test_diffs.R index 1425dcb..f843d75 100644 --- a/R/full_data_test_diffs.R +++ b/R/full_data_test_diffs.R @@ -2,7 +2,6 @@ library(fst) library(tidyverse) library(data.table) library(survey) -library(SPHSUgraphs) satis_income_df <- diff --git a/R/graphing_comparative_results.R b/R/graphing_comparative_results.R new file mode 100644 index 0000000..9a3faa6 --- /dev/null +++ b/R/graphing_comparative_results.R @@ -0,0 +1,33 @@ +library(tidyverse) + +results <- tribble( + ~ metric, ~text, + "Life Satisfaction", "-0.066 point drop in life satisfaction (-0.087 to -0.044)", + "Happiness", "-0.068 point drop in happiness (-0.094 to -0.042)", + "Anxiety", "0.065 point increase in anxiety (0.031 to 0.098)", + "Life Worthwhile", "-0.074 point drop in life being worthwhile (-0.095 to -0.053)", + "Life Satisfaction", "-0.12 point drop in life satisfaction (-0.15 to -0.09)", + "Happiness", "-0.11 point drop in happiness (-0.15 to -0.07)", + "Anxiety", "0.12 point increase in anxiety (0.07 to 0.17)", + "Life Worthwhile", "-0.12 point drop in life being worthwhile (-0.15 to -0.09)" +) |> + mutate( + estimate = str_extract(text, "-?0\\.\\d*") |> as.numeric(), + cl = str_extract(text, "(?<=\\()-?0\\.\\d*") |> as.numeric(), + cu = str_extract(text, "(?<=to )-?0\\.\\d*") |> as.numeric(), + time = c(rep("Within 1y", 4), rep(">20% threshold", 4)) + ) + +results |> + ggplot(aes(estimate, metric, colour = time)) + + geom_point(position = position_dodge(width = 0.5)) + + geom_linerange(aes(xmin = cl, xmax = cu), position = position_dodge(width = 0.5)) + + geom_vline(xintercept = 0) + + SPHSUgraphs::theme_sphsu_light() + + SPHSUgraphs::scale_colour_sphsu() + + +results |> + mutate(prop_rec = if_else(time == "Within 1y", 0.041, 0.099), + scaled_eff = estimate / prop_rec) |> + select(metric, time, scaled_eff) diff --git a/R/graphing_respondents_conditions.R b/R/graphing_respondents_conditions.R index a090957..8e0bca6 100644 --- a/R/graphing_respondents_conditions.R +++ b/R/graphing_respondents_conditions.R @@ -72,7 +72,7 @@ apr14_mar21_dt <- apr14_mar21 |> mutate(date = floor_date(dmy(refdte), "months")) |> data.table() -apr14_mar21_dt <- apr14_mar21_dt[age > 17 & age < 66] +apr14_mar21_dt <- apr14_mar21_dt[age >= 16 & age <= 64] apr14_mar21_dt[,uc := rowSums(.SD == 1), .SD = 2:11] apr14_mar21_dt[,tax_cr := rowSums(.SD == 3), .SD = 2:11] @@ -137,8 +137,8 @@ patch + subtitle = "April 2014 - March 2021") -ggsave(filename = "graphs/proporrions_benefit_type_weighted.png", width = 20, height = 24, - units = "cm", dpi = 400) +# ggsave(filename = "graphs/proporrions_benefit_type_weighted.png", width = 20, height = 24, + # units = "cm", dpi = 400) (n_claim <- apr14_mar21_dt |> @@ -214,4 +214,4 @@ comb_uc_rec |> plot_annotation(title = "Proportions of working-age adults reporting receiving UC vs observed rates", subtitle = "April 2014 - March 2021") -ggsave(filename = "graphs/comparison_uc_reporting.png", width = 20, height = 10, dpi = 400, units = "cm") +# ggsave(filename = "graphs/comparison_uc_reporting.png", width = 20, height = 10, dpi = 400, units = "cm") diff --git a/R/power_calcs.R b/R/power_calcs.R new file mode 100644 index 0000000..cabe7fb --- /dev/null +++ b/R/power_calcs.R @@ -0,0 +1,47 @@ +library(fst) + +aps_2021 <- read_fst("data/fst_files/apr20_mar21.fst") +library(tidyverse) + +satis_income_df <- + c( + "data/fst_files/apr11_mar12.fst", + "data/fst_files/apr12_mar13.fst", + "data/fst_files/apr13_mar14.fst", + "data/fst_files/apr14_mar15.fst", + "data/fst_files/apr15_mar16.fst", + "data/fst_files/apr16_mar17.fst", + "data/fst_files/apr17_mar18.fst", + "data/fst_files/apr18_mar19.fst", + "data/fst_files/apr19_mar20.fst", + "data/fst_files/apr20_mar21.fst" + ) |> map(read_fst, columns = c("refdte", "age", "satis")) + +# df <- read_fst("data/fst_files/apr19_mar20.fst") + +names(satis_income_df) <- 2011:2020 + +stats <- tibble(year = as.character(2011:2020), data = satis_income_df[year]) |> + unnest(data) |> + mutate(satis = na_if(satis, -9)) |> + filter(age < 20, !is.na(satis)) |> + summarise(mean = mean(satis), sd = sd(satis)) + +library(pwr) + +pwr.t.test( + n = 385000, + d = 0.05 / stats$sd, + sig.level = 0.01 +) + +stats$mean + + +pwr.t.test(d = 0.2, power = 0.8, sig.level = 0.05) + +aps_2021 |> + as_tibble() |> + select(age, satis) |> + filter(age < 20, !is.na(satis)) |> + summarise(mean = mean(satis), sd = sd(satis)) diff --git a/R/recreating_main_graph.R b/R/recreating_main_graph.R new file mode 100644 index 0000000..d951c92 --- /dev/null +++ b/R/recreating_main_graph.R @@ -0,0 +1,96 @@ +library(tidyverse) +library(docxtractr) +library(patchwork) + +main_doc <- read_docx("data/ate_paper_outputs.docx") + +main_tables <- main_doc$tbls |> + imap(\(tbl, i) { + + docx_extract_tbl(main_doc, i) |> + janitor::clean_names() |> + filter(row_number()< n()) + + }) + + +life_sat <- main_tables[[7]] +happy <- main_tables[[13]] +life_worth <- main_tables[[19]] +anxiety <- main_tables[[25]] + +plotting_data <- list( + "Life Satisfaction" = life_sat, + "Happiness" = happy, + "Life Worthwhile" = life_worth, + "Anxiety" = anxiety +) |> + imap(~mutate(.x, outcome = .y)) |> + reduce(bind_rows) |> + separate_wider_delim(x95_ci, delim = ", ", names = c("ll", "ul")) |> + mutate(across(estimate:ul, as.numeric), outcome = fct_inorder(outcome) |> fct_rev()) + +effect_labs <- plotting_data |> + mutate(label = glue::glue( + "{effect} ({lower}, {upper})", + effect = sprintf("%.2f", estimate), + lower = sprintf("%.2f", ll), + upper = sprintf("%.2f", ul) + )) |> + ggplot(aes(1, outcome, label = label)) + + geom_text(position = position_dodge(width = 0.5), fontface ="bold") + + theme_void() + + theme(legend.position = "none") + +main_plot <- plotting_data |> + ggplot(aes(estimate, outcome)) + + geom_point(position = position_dodge(width = 0.5)) + + geom_errorbar(aes(xmin = ll, xmax = ul), width = 0.3, position = position_dodge(width = 0.5)) + + geom_vline(xintercept = 0, linetype = "dashed") + + scale_y_discrete("Well-being outcome") + + theme_minimal() + + scale_x_continuous("Effect estimate (95%CI)") + + theme(legend.position = "bottom", text = element_text(size = 12)) + +main_plot + effect_labs + plot_layout(widths = c(5, 2)) + + plot_annotation(caption = "Unweighted N = 245,658") + +ggsave("graphs/main_outcome.svg", height = 10.7*1.2, width = 17.2*1.2, units = "cm") +ggsave("graphs/main_outcome.tiff", height = 10.7*1.2, width = 17.2*1.2, units = "cm", compression = "lzw", dpi = 600) + + + +# event study plots ------------------------------------------------------- + + +life_sat <- main_tables[[8]] +happy <- main_tables[[14]] +life_worth <- main_tables[[20]] +anxiety <- main_tables[[26]] + +plotting_data <- list( + "Life Satisfaction" = life_sat, + "Happiness" = happy, + "Life Worthwhile" = life_worth, + "Anxiety" = anxiety +) |> + imap(~mutate(.x, outcome = .y)) |> + reduce(bind_rows) |> + separate_wider_delim(x95_ci, delim = ", ", names = c("ll", "ul")) |> + mutate(across(-outcome, as.numeric), outcome = fct_inorder(outcome) |> fct_rev()) + + +plotting_data |> + ggplot(aes(quarter_relative_to_uc_rollout, estimate)) + + geom_vline(xintercept = -0.5, linetype = "dashed", colour = "red") + + geom_hline(yintercept = 0, colour = "grey") + + geom_point() + + geom_linerange(aes(ymin = ll, ymax = ul)) + + facet_wrap(~outcome, scales = "free_y") + + scale_x_continuous("Quarter relative to UC rollout") + + theme_minimal() + + scale_y_continuous("Effect estimate (95%CI)") + + labs(caption = "Unweighted N = 245,658") + +ggsave("graphs/event_study_plots.svg", height = 15*1.2, width = 19*1.2, units = "cm") +ggsave("graphs/event_study_plots.tiff", height = 15*1.2, width = 19*1.2, units = "cm", compression = "lzw", dpi = 600) diff --git a/R/subgroup graphing.R b/R/subgroup graphing.R new file mode 100644 index 0000000..a90480c --- /dev/null +++ b/R/subgroup graphing.R @@ -0,0 +1,69 @@ +library(tidyverse) +library(officer) +library(docxtractr) +library(SPHSUgraphs) +library(patchwork) + +theme_set(theme_sphsu_light()) + +subgroup_doc <- read_docx("data/ate_subgroups_differencing_effects.docx") + +tables <- map(1:24, ~docx_extract_tbl(subgroup_doc, tbl_number = .x)) + +end_table <- docx_extract_tbl(subgroup_doc, tbl_number = 25) + +library(readxl) + +end_table <- read_xlsx("data/concat_subgroups.xlsx", skip = 2, col_types = c( + "text", + "text", + "numeric", + "text", + "numeric", + "text", + "numeric", + "text", + "numeric", + "text" + )) + +subgroup_interactions <- end_table |> + filter(!is.na(Comparison)) |> + pivot_longer( + -c(Subgroup, Comparison), + names_to = c("Metric", ".value"), + names_sep = "_" + ) |> + separate_wider_delim(ends_with("95% CI"), " to ", names = c("ll", "ul")) |> + mutate(across(c(ll, ul), as.numeric)) + +subgroup_interactions |> + mutate(se = (ul - ll)/(2 * 1.96)) |> + rowwise() |> + mutate(p = 2 * pnorm(-abs(estimate), sd = se, lower.tail = TRUE), + signif = p < 0.05) |> + ggplot(aes(Metric, Comparison, fill = signif)) + + geom_tile() + +subgroup_interactions |> + mutate(Metric = fct_inorder(Metric) |> fct_rev()) |> + nest(data = -Subgroup) |> + mutate(graph = map2(data, Subgroup, \(df, subg) { + + df |> + ggplot(aes(estimate, Metric, colour = Comparison)) + + geom_point(position = position_dodge(width = 0.5)) + + geom_errorbar(aes(xmin = ll, xmax = ul), width = 0.2, + position = position_dodge(width = 0.5)) + + geom_vline(xintercept = 0, linetype = "dotted") + + scale_color_brewer(name = subg, palette = "Set1") + + facet_wrap(~Comparison) + + theme_minimal() + + })) |> + group_by(Subgroup) |> + group_walk(\(df, subg) { + + ggsave(plot = df$graph[[1]], filename = glue::glue("graphs/subg_{subg}.png")) + + }) diff --git a/R/test_trends.R b/R/test_trends.R new file mode 100644 index 0000000..c4c1a1a --- /dev/null +++ b/R/test_trends.R @@ -0,0 +1,148 @@ +library(fst) +library(tidyverse) +library(data.table) + +cols_present <- read_fst("data/fst_files/apr17_mar18.fst", to = 10) + +cols_present$npwt18 + +wb_df <- + c( + # "data/fst_files/apr11_mar12.fst", + # "data/fst_files/apr12_mar13.fst", + # "data/fst_files/apr13_mar14.fst", + "data/fst_files/apr14_mar15.fst", + "data/fst_files/apr15_mar16.fst", + "data/fst_files/apr16_mar17.fst", + "data/fst_files/apr17_mar18.fst", + "data/fst_files/apr18_mar19.fst" + # "data/fst_files/apr19_mar20.fst" + # "data/fst_files/apr20_mar21.fst" + ) |> map(\(file_name) { + read_fst(file_name, + columns = c("age", + "satis", + "worth", + "anxious", + "happy", + "statr", + "ilodefr", + "refdte", + "npwt18", + "gor9d")) + }) + +# df <- read_fst("data/fst_files/apr19_mar20.fst") + +names(wb_df) <- 2014:2018 + +tidy_df <- enframe(wb_df, "datayear", "dat") |> + unnest(dat) |> + mutate( + Date = floor_date(dmy(refdte), "months"), + across(satis:happy, ~na_if(.x, -9)), + month = month(Date) + (year(Date) - 2014) * 12, + q = quarter(Date) + (year(Date) - 2014) * 4 + ) + +library(did) +library(magrittr) +library(plm) + +pseudo_rollouts <- tidy_df |> + filter(year(Date) > 2014) |> + select(gor9d, rollout = month, r_q = q) |> + slice_sample(n = 1, by = gor9d) + +tidy_df <- tidy_df |> + left_join(pseudo_rollouts, by = join_by(gor9d)) |> + mutate(lag_rollout = factor(month - rollout), + lag_q = factor(q - r_q), + id = as.numeric(as.factor(gor9d))) + + +# twfe model -------------------------------------------------------------- + +model_twfe <- tidy_df |> + mutate(uc = (q <= r_q) * 1) %$% + lm(anxious ~ uc + poly(age, 3) + ilodefr + datayear + gor9d, weights = npwt18) + +broom::tidy(model_twfe) + + +# fixest model ------------------------------------------------------------ + +library(fixest) + +model_feols <- tidy_df |> + mutate(uc = (q <= r_q) * 1) |> + feols(anxious ~ uc + poly(age, 3) + ilodefr + datayear | r_q + gor9d, + data = _, + weights = ~npwt18) + +summary(model_feols) + +# panel_model ------------------------------------------------------------- + + +panel_model <- plm(satis ~ lag_q, data = tidy_df, model = "within", effect = "twoways", index = c("gor9d", "q")) + + +summary(panel_model) + + +# some housekeeping for making the plot +# add 0 at event time -1 +coefs <- coef(panel_model) +ses <- sqrt(diag(summary(panel_model)$vcov)) +# coefs <- c(coefs1[idx.pre], 0, coefs1[idx.post]) +# ses <- c(ses1[idx.pre], 0, ses1[idx.post]) +exposure <- -24:23 + +cmat <- data.frame(coefs=coefs, ses=ses, exposure=exposure) + +library(ggplot2) + +ggplot(data = cmat, mapping = aes(y = coefs, x = exposure)) + + geom_line(linetype = "dashed") + + geom_point() + + geom_errorbar(aes(ymin = (coefs-1.96*ses), ymax = (coefs+1.96*ses)), width = 0.2) + + ylim(c(-2, 5)) + + theme_bw() + +# Using did methods + +did_att_gt <- att_gt(yname = "anxious", + tname = "q", + idname = "id", + gname = "rollout", + xformla = ~ ilodefr, + data = tidy_df, + control_group = "notyettreated", + clustervars = "gor9d", + weightsname = "npwt18", + bstrap = TRUE, + cband = FALSE, + panel = FALSE) + + +summary(did_att_gt) + +# aggregate them into event study plot +did_es <- aggte(did_att_gt, type = "dynamic") + +# plot the event study +ggdid(did_es) + +aggte(did_att_gt, type = "simple") + +conditional_did_pretest( + yname = "anxious", + tname = "month", + gname = "rollout", + idname = "id", + xformla = ~ilodefr, + data = tidy_df |> filter(month < max(rollout)), + panel = FALSE, + bstrap = FALSE +) diff --git a/R/testing_income_distribution.R b/R/testing_income_distribution.R new file mode 100644 index 0000000..32afc57 --- /dev/null +++ b/R/testing_income_distribution.R @@ -0,0 +1,97 @@ +library(fst) +library(data.table) +library(tidyverse) +library(dtplyr) +library(lubridate) +library(SPHSUgraphs) +library(patchwork) + +theme_set(theme_sphsu_light()) + +apr14_mar21 <- c( + "data/fst_files/apr14_mar15.fst", + "data/fst_files/apr15_mar16.fst", + "data/fst_files/apr16_mar17.fst", + "data/fst_files/apr17_mar18.fst", + "data/fst_files/apr18_mar19.fst", + "data/fst_files/apr19_mar20.fst", + "data/fst_files/apr20_mar21.fst" +) |> map(read_fst, c( + "grsswk", + "refdte")) + + +apr14_mar21 |> + reduce(bind_rows) |> + mutate(grsswk = if_else(grsswk < 0, 0, grsswk)) |> + ggplot(aes(grsswk)) + + geom_histogram(aes(y = after_stat(count / sum(count)))) + +apr14_mar21 |> + reduce(bind_rows) |> + filter(grsswk == max(grsswk)) |> + mutate(income = grsswk * 52) + +apr14_mar21 |> + reduce(bind_rows) |> + mutate( + income = cut( + grsswk, + breaks = c(-9, -8, 0, seq(1, 170, length.out = 5), seq(231, max(grsswk), length.out = 15)), + right = FALSE, + include.lowest = TRUE + ), + included = factor( + case_when( + grsswk < 0 ~ "Missing (included by default)", + grsswk > 230 ~ "High income", + .default = "Low income" + ) + ), + ) |> + ggplot(aes(income, fill = included)) + + stat_count(geom = "bar") + + scale_fill_discrete("") + + scale_x_discrete("Annual income in employment", labels = c("-9", "-8", ">£0", rep("", 4), ">£12,000", rep("", 12), "£41,000+")) + +# all income -------------------------------------------------------------- + + +top_100 <- read_fst("data/fst_files/apr14_mar15.fst", + to = 100) + +top_100 |> + select(starts_with("band"), starts_with("gr"), starts_with("inc"), starts_with("net")) |> names() |> dput() + + +earnings_21 <- read_fst("data/fst_files/apr20_mar21.fst", + columns = c("bandg", "bandg2", "bandn", "bandn2", "gross99", "grsexp", + "grsprd", "grsswk", "grsswk2", "incnow", "incsup", "net99", "netprd", + "netwk", "netwk2")) + + +earnings_nas <- earnings_21 |> + tibble() |> + mutate(across(.fns = as.numeric)) |> + mutate(across(.fns = ~ if_else(.x == -9, NA_real_, .x))) + +earnings_nas |> + summarise(across(.fns = ~sum(is.na(.x))/n())) + +earnings_nas |> + select(gross99, grsswk, incnow, net99) |> + pivot_longer( + everything(), + names_to = "metric", + values_to = "value" + ) |> + replace_na(list(value = 0)) |> + ggplot(aes(value)) + + geom_histogram() + + facet_wrap(~metric) + +ukmod_income <- fread(file = "data/ukmod_out/uk_2019_UCAon.txt") + +ukmod_income |> + ggplot(aes(yem)) + + geom_histogram() diff --git a/R/testing_sarima_on_wellbeing.R b/R/testing_sarima_on_wellbeing.R index d109960..83d3180 100644 --- a/R/testing_sarima_on_wellbeing.R +++ b/R/testing_sarima_on_wellbeing.R @@ -1,5 +1,6 @@ library(tidyverse) library(lubridate) +library(data.table) library(fst) apr11_mar21 <- c( @@ -79,8 +80,8 @@ plot(diff(diff(bare$score, 12)), type = "l") acf2(diff(diff(bare$score, 12))) -mod1 <- auto.arima(bare$score[1:50], - # xreg = as.matrix(c("exp", "t_exp")]), +mod1 <- auto.arima(bare$score, + xreg = as.matrix(bare[c("exp", "t_exp")]), approximation = FALSE, allowdrift = FALSE, seasonal = TRUE, trace = TRUE) diff --git a/reports/outline_paper_parts.Rmd b/reports/outline_paper_parts.Rmd index 8b101c0..7ff5d25 100644 --- a/reports/outline_paper_parts.Rmd +++ b/reports/outline_paper_parts.Rmd @@ -15,6 +15,10 @@ editor_options: wrap: 72 --- +```{r setup, include=FALSE} +knitr::opts_chunk$set(warning = FALSE, echo = FALSE, message = FALSE, fig.height=8.0, fig.width=12.0, compression="lzw", dev="tiff", dpi=500, out.width=900) +``` + # Abstract #### Background @@ -266,10 +270,9 @@ x% of respondents. The variable with the highest rate of missingness was earned income, with data missing for x% of eligible respondents. We generated [20] imputed datasets. -We additionally carried out a 'complete -case' analysis of all observations with complete data. We compared -effect estimates between complete and imputed samples to examine -robustness of findings. +We additionally carried out a 'complete case' analysis of all +observations with complete data. We compared effect estimates between +complete and imputed samples to examine robustness of findings. ## Statistical analysis @@ -292,6 +295,139 @@ and standard error term for each quarterly period before and after introduction of Universal Credit to examine timings and patters of effects. +# Graphing conditions across exposure period + +```{r claim_patterns} +library(fst) +library(data.table) +library(tidyverse) +library(dtplyr) +library(lubridate) +library(SPHSUgraphs) +library(patchwork) + +theme_set(theme_sphsu_light()) + +here::i_am("reports/outline_paper_parts.Rmd") + +apr14_mar21 <- c( + here::here("data", "fst_files", "apr14_mar15.fst"), + here::here("data", "fst_files", "apr15_mar16.fst"), + here::here("data", "fst_files", "apr16_mar17.fst"), + here::here("data", "fst_files", "apr17_mar18.fst"), + here::here("data", "fst_files", "apr18_mar19.fst"), + here::here("data", "fst_files", "apr19_mar20.fst"), + here::here("data", "fst_files", "apr20_mar21.fst") +) |> map(read_fst, c( + "age", + "tpbn1301", + "tpbn1302", + "tpbn1303", + "tpbn1304", + "tpbn1305", + "tpbn1306", + "tpbn1307", + "tpbn1308", + "tpbn1309", + "tpbn1310", + "refdte")) + +wts1 <- c( + here::here("data", "fst_files", "apr14_mar15.fst"), + here::here("data", "fst_files", "apr15_mar16.fst"), + here::here("data", "fst_files", "apr16_mar17.fst"), + here::here("data", "fst_files", "apr17_mar18.fst"), + here::here("data", "fst_files", "apr18_mar19.fst") + # "data/fst_files/apr19_mar20.fst" + # "data/fst_files/apr20_mar21.fst" +) |> map(read_fst, "pwta18") |> + map(`colnames<-`, "weight") + +wts2 <- c(here::here("data", "fst_files", "apr19_mar20.fst"), + here::here("data", "fst_files", "apr20_mar21.fst")) |> + map(read_fst, "pwta20") |> + map(`colnames<-`, "weight") + + +n_uc <- apr14_mar21 |> + map2(append(wts1, wts2), cbind) |> + reduce(bind_rows) |> + mutate(date = floor_date(dmy(refdte), "months"), + across(tpbn1301:tpbn1310, ~.x == 1)) |> + rowwise() |> + mutate(uc = sum(tpbn1301:tpbn1310)*weight) + +apr14_mar21_dt <- apr14_mar21 |> + map2(append(wts1, wts2), cbind) |> + reduce(bind_rows) |> + mutate(date = floor_date(dmy(refdte), "months")) |> + data.table() + +apr14_mar21_dt <- apr14_mar21_dt[age > 17 & age < 66] + +apr14_mar21_dt[,uc := rowSums(.SD == 1), .SD = 2:11] +apr14_mar21_dt[,tax_cr := rowSums(.SD == 3), .SD = 2:11] +apr14_mar21_dt[,housing := rowSums(.SD == 2), .SD = 2:11] +apr14_mar21_dt[,income_s := rowSums(.SD == 4), .SD = 2:11] +apr14_mar21_dt[,jsa := rowSums(.SD == 5), .SD = 2:11] +apr14_mar21_dt[,disab := rowSums(.SD == 6), .SD = 2:11] +apr14_mar21_dt[,other := as.numeric(rowSums(.SD)>0), .SD = tax_cr:jsa] + +perc_other <- apr14_mar21_dt |> + as_tibble() |> + group_by(date) |> + select(date, other, weight) |> + summarise(prop_other = sum(other * weight)/sum(weight)) |> + ggplot(aes(date, prop_other)) + + geom_col(fill = sphsu_cols("Pumpkin"), width = 32) + + scale_x_date("Year") + + scale_y_continuous("Respondents on\nLegacy Benefits", labels = scales::percent, + expand = expansion(mult = c(0, 0.05))) + +perc_all <- apr14_mar21_dt |> + as_tibble() |> + group_by(date) |> + select(date, uc, weight) |> + summarise(prop_uc = sum(uc * weight)/sum(weight)) |> + ggplot(aes(date, prop_uc)) + + geom_col(fill = sphsu_cols("University Blue"), width = 32, position = "identity") + + scale_x_date("Year") + + scale_y_continuous("Respondents on UC", labels = scales::percent, + expand = expansion(mult = c(0, 0.05))) + +perc_claim <- apr14_mar21_dt |> + group_by(date) |> + select(date, uc, other, weight) |> + filter(uc == 1 | other == 1) |> + mutate(benefits = case_when( + uc == 1 & other == 1 ~ "Combination", + uc == 1 ~ "Universal Credit", + other == 1 ~ "Legacy Benefits" + ), + benefits = factor(benefits, levels = c("Legacy Benefits", "Combination", "Universal Credit"))) |> + as_tibble() |> + # group_by(date, benefits) |> + # summarise(n = sum(weight)) |> + ggplot(aes(date, weight, fill = benefits)) + + # geom_col(aes(y = weight), width = 32, position = "fill") + + stat_summary(fun = sum, position = "fill", geom = "bar", width = 32) + + # stat_sum(aes(y = weight), position = "fill", geom = "bar", width = 31) + + # stat_count(position = "fill", geom = "bar", aes(y = after_stat(count)), width = 31, position = "identity") + + scale_fill_manual(name = "Benefits claimed", values = sphsu_cols("Pumpkin", "Leaf", "University Blue", names = FALSE)) + + scale_x_date("Year") + + theme(legend.position = "bottom") + + guides(size = guide_none()) + + scale_y_continuous("Claimant by benefit types", labels = scales::percent, + expand = expansion(mult = c(0, 0.05))) + + +patch <- (perc_other/perc_all/perc_claim) & theme(panel.grid.major.x = element_blank(), + panel.grid.minor.x = element_blank()) +patch + + plot_annotation(title = "Proportions of working-age adults claiming benefit types", + subtitle = "April 2014 - March 2021") +``` + \newpage # References diff --git a/reports/template_doc.docx b/reports/template_doc.docx index 3bb70b4..85362e2 100644 Binary files a/reports/template_doc.docx and b/reports/template_doc.docx differ