Skip to content

Commit

Permalink
Merge pull request #16 from MRC-CSO-SPHSU/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
andrewbaxter439 authored Oct 1, 2024
2 parents a29a5dc + 776c3e0 commit 9674e51
Show file tree
Hide file tree
Showing 14 changed files with 832 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
/reference
/graphs
/output/*
*_reprex.*
64 changes: 64 additions & 0 deletions R/constructing_interaction_graphs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
library(docxtractr)
library(tidyverse)
library(patchwork)

subg_doc <- read_docx("data/ate_paper_subgroups.docx")

subg_tables <- subg_doc$tbls |>
imap(\(tbl, i) {

docx_extract_tbl(subg_doc, i) |>
filter(row_number() < n())

})

interaction_terms <- subg_tables |>
reduce(bind_rows) |>
janitor::clean_names() |>
filter(!is.na(interaction_term)) |>
separate(interaction_term, into = c("axis", "subgroup"), sep = ":") |>
select(outcome, axis, subgroup, effect_size, standard_error, x95_ci, subgroup_unweighted_n) |>
separate(x95_ci, into = c("li", "ul"), sep = ", ", convert = TRUE) |>
mutate(effect_size = as.numeric(effect_size),
standard_error = as.numeric(standard_error),
outcome = fct_inorder(outcome))


interaction_plots <- interaction_terms |>
nest(results = -axis) |>
mutate(gp = map2(results, axis, \(data, title) {

effect_labs <- data |>
mutate(label = glue::glue(
"{effect} ({lower}, {upper})",
effect = sprintf("%.2f", effect_size),
lower = sprintf("%.2f", li),
upper = sprintf("%.2f", ul)
)) |>
ggplot(aes(1, fct_rev(outcome), label = label, colour = subgroup)) +
geom_text(position = position_dodge(width = 0.5), fontface ="bold") +
theme_void() +
scale_colour_brewer(palette = "Set1") +
theme(legend.position = "none")

main_plot <- ggplot(data, aes(effect_size, fct_rev(outcome), colour = subgroup)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(aes(xmin = li, xmax = ul), width = 0.3, position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 0, linetype = "dashed") +
scale_y_discrete("Well-being outcome") +
scale_colour_brewer("", palette = "Set1", guide = guide_legend(reverse = TRUE)) +
theme_minimal() +
scale_x_continuous("Effect estimate (95%CI)") +
theme(legend.position = "bottom", text = element_text(size = 12))

main_plot + effect_labs + plot_layout(widths = c(5, 2)) +
plot_annotation(title = glue::glue("Interaction effects by {title}"))
}))

interaction_plots$gp[1]

interaction_plots$gp |>
iwalk(\(plot, number) ggsave(glue::glue("graphs/SPAplot{number}.png"), plot, width = 150, height = 120, units = "mm"))


ggsave(interaction_plots$gp[12][[1]], filename = ggsave("graphs/SPAplot12.png"), width = 150, height = 150, units = "mm")
130 changes: 130 additions & 0 deletions R/estimating_comparable_effects.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
library(tidyverse)
library(lubridate)
library(fst)


pre_post_covid <- c(
"pre_c" = "data/fst_files/apr19_mar20.fst",
"post_c" = "data/fst_files/apr20_mar21.fst"
) |> map(read_fst, c(
"satis",
"anxious",
"happy",
"worth",
"pwta20",
"refdte"),
as.data.table = TRUE)

pre_post_dat <- pre_post_covid |>
imap(~mutate(.x, period = .y)) |>
reduce(bind_rows) |>
mutate(across(satis:worth, ~ ifelse(.x == -9|.x == -8, NA, .x)),
Date = floor_date(dmy(refdte), "months"),
Month = factor(month(Date)),
Year = factor(year(Date)),
period = fct_inorder(period))

pre_post_dat |>
pivot_longer(satis:worth, names_to = "metric", values_to = "Score") |>
filter(!is.na(Score)) |>
ggplot(aes(period, Score)) +
stat_summary(geom = "pointrange", shape = 15,
fun.data = mean_se) +
facet_wrap(~metric, scale = "free_y")

library(fixest)

tibble(outcome = c("satis", "happy", "worth", "anxious")) |>
mutate(model = map(outcome, \(outcome) {

feols(as.formula(paste(outcome, "~ period | Month")), data = pre_post_dat, weights = ~pwta20)

})) |>
mutate(coefs = map(model, broom::tidy, conf.int = TRUE)) |>
unnest(coefs) |>
filter(term == "periodpost_c")


pre_post_dat |>
as_tibble() |>
pivot_longer(satis:worth, names_to = "outcome", values_to = "score") |>
ggplot(aes(Date, score)) +
stat_summary(geom = "line", fun = mean) +
stat_summary(fun.data = mean_cl_normal, aes(colour = Year)) +
geom_vline(xintercept = ymd("20200401")) +
facet_wrap(~outcome, scales = "free_y")

# March is start of exposure effects (anticipatory)

# longer lead-in ----------------------------------------------------------


apr11_mar21 <- c(
`2013` = "data/fst_files/apr13_mar14.fst",
`2014` = "data/fst_files/apr14_mar15.fst",
`2015` = "data/fst_files/apr15_mar16.fst",
`2016` = "data/fst_files/apr16_mar17.fst",
`2017` = "data/fst_files/apr17_mar18.fst",
`2018` = "data/fst_files/apr18_mar19.fst"
) |> map(read_fst, c(
"satis",
"anxious",
"happy",
"worth",
"refdte",
"pwta18"),
as.data.table = TRUE) |>
c(
c(
`2019` = "data/fst_files/apr19_mar20.fst",
`2020` = "data/fst_files/apr20_mar21.fst"
) |> map(read_fst, c(
"satis",
"anxious",
"happy",
"worth",
"refdte",
"pwta20"),
as.data.table = TRUE)
)




longer_data <- apr11_mar21 |>
imap(~mutate(.x, period = .y)) |>
map(rename_with, \(x) str_replace(x, "pwta\\d{2}", "weight")) |>
reduce(bind_rows) |>
mutate(across(satis:worth, ~ ifelse(.x == -9|.x == -8, NA, .x)),
Date = floor_date(dmy(refdte), "months"),
Month = factor(month(Date)),
Year = factor(year(Date)),
period = fct_inorder(period) |> as.integer(),
covid = as.integer(Date >= ymd("20200301")))

longer_data |>
pivot_longer(satis:worth, names_to = "metric", values_to = "Score") |>
filter(!is.na(Score)) |>
ggplot(aes(Year, Score)) +
stat_summary(geom = "pointrange", shape = 15,
fun.data = mean_se) +
facet_wrap(~metric, scale = "free_y")

tibble(outcome = c("satis", "happy", "worth", "anxious")) |>
mutate(model = map(outcome, \(outcome) {

feols(as.formula(paste(outcome, "~ covid + Year | Month")), data = longer_data, weights = ~weight)

})) |>
mutate(coefs = map(model, broom::tidy, conf.int = TRUE)) |>
unnest(coefs) |>
filter(term == "covid")

read_table("
UC COVID
-0.66 -0.23
-0.41 -0.23
-0.73 -0.12
+0.79 +0.43
") |>
mutate(mult = UC/COVID)
1 change: 0 additions & 1 deletion R/full_data_test_diffs.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ library(fst)
library(tidyverse)
library(data.table)
library(survey)
library(SPHSUgraphs)


satis_income_df <-
Expand Down
33 changes: 33 additions & 0 deletions R/graphing_comparative_results.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
library(tidyverse)

results <- tribble(
~ metric, ~text,
"Life Satisfaction", "-0.066 point drop in life satisfaction (-0.087 to -0.044)",
"Happiness", "-0.068 point drop in happiness (-0.094 to -0.042)",
"Anxiety", "0.065 point increase in anxiety (0.031 to 0.098)",
"Life Worthwhile", "-0.074 point drop in life being worthwhile (-0.095 to -0.053)",
"Life Satisfaction", "-0.12 point drop in life satisfaction (-0.15 to -0.09)",
"Happiness", "-0.11 point drop in happiness (-0.15 to -0.07)",
"Anxiety", "0.12 point increase in anxiety (0.07 to 0.17)",
"Life Worthwhile", "-0.12 point drop in life being worthwhile (-0.15 to -0.09)"
) |>
mutate(
estimate = str_extract(text, "-?0\\.\\d*") |> as.numeric(),
cl = str_extract(text, "(?<=\\()-?0\\.\\d*") |> as.numeric(),
cu = str_extract(text, "(?<=to )-?0\\.\\d*") |> as.numeric(),
time = c(rep("Within 1y", 4), rep(">20% threshold", 4))
)

results |>
ggplot(aes(estimate, metric, colour = time)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_linerange(aes(xmin = cl, xmax = cu), position = position_dodge(width = 0.5)) +
geom_vline(xintercept = 0) +
SPHSUgraphs::theme_sphsu_light() +
SPHSUgraphs::scale_colour_sphsu()


results |>
mutate(prop_rec = if_else(time == "Within 1y", 0.041, 0.099),
scaled_eff = estimate / prop_rec) |>
select(metric, time, scaled_eff)
8 changes: 4 additions & 4 deletions R/graphing_respondents_conditions.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ apr14_mar21_dt <- apr14_mar21 |>
mutate(date = floor_date(dmy(refdte), "months")) |>
data.table()

apr14_mar21_dt <- apr14_mar21_dt[age > 17 & age < 66]
apr14_mar21_dt <- apr14_mar21_dt[age >= 16 & age <= 64]

apr14_mar21_dt[,uc := rowSums(.SD == 1), .SD = 2:11]
apr14_mar21_dt[,tax_cr := rowSums(.SD == 3), .SD = 2:11]
Expand Down Expand Up @@ -137,8 +137,8 @@ patch +
subtitle = "April 2014 - March 2021")


ggsave(filename = "graphs/proporrions_benefit_type_weighted.png", width = 20, height = 24,
units = "cm", dpi = 400)
# ggsave(filename = "graphs/proporrions_benefit_type_weighted.png", width = 20, height = 24,
# units = "cm", dpi = 400)


(n_claim <- apr14_mar21_dt |>
Expand Down Expand Up @@ -214,4 +214,4 @@ comb_uc_rec |>
plot_annotation(title = "Proportions of working-age adults reporting receiving UC vs observed rates",
subtitle = "April 2014 - March 2021")

ggsave(filename = "graphs/comparison_uc_reporting.png", width = 20, height = 10, dpi = 400, units = "cm")
# ggsave(filename = "graphs/comparison_uc_reporting.png", width = 20, height = 10, dpi = 400, units = "cm")
47 changes: 47 additions & 0 deletions R/power_calcs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
library(fst)

aps_2021 <- read_fst("data/fst_files/apr20_mar21.fst")
library(tidyverse)

satis_income_df <-
c(
"data/fst_files/apr11_mar12.fst",
"data/fst_files/apr12_mar13.fst",
"data/fst_files/apr13_mar14.fst",
"data/fst_files/apr14_mar15.fst",
"data/fst_files/apr15_mar16.fst",
"data/fst_files/apr16_mar17.fst",
"data/fst_files/apr17_mar18.fst",
"data/fst_files/apr18_mar19.fst",
"data/fst_files/apr19_mar20.fst",
"data/fst_files/apr20_mar21.fst"
) |> map(read_fst, columns = c("refdte", "age", "satis"))

# df <- read_fst("data/fst_files/apr19_mar20.fst")

names(satis_income_df) <- 2011:2020

stats <- tibble(year = as.character(2011:2020), data = satis_income_df[year]) |>
unnest(data) |>
mutate(satis = na_if(satis, -9)) |>
filter(age < 20, !is.na(satis)) |>
summarise(mean = mean(satis), sd = sd(satis))

library(pwr)

pwr.t.test(
n = 385000,
d = 0.05 / stats$sd,
sig.level = 0.01
)

stats$mean


pwr.t.test(d = 0.2, power = 0.8, sig.level = 0.05)

aps_2021 |>
as_tibble() |>
select(age, satis) |>
filter(age < 20, !is.na(satis)) |>
summarise(mean = mean(satis), sd = sd(satis))
Loading

0 comments on commit 9674e51

Please sign in to comment.