From 6084ab24016afe81afa0ed43478a7e393f944177 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Wed, 11 Dec 2024 12:47:53 +0200 Subject: [PATCH] Update convert_r_d_OR.Rmd --- vignettes/convert_r_d_OR.Rmd | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/vignettes/convert_r_d_OR.Rmd b/vignettes/convert_r_d_OR.Rmd index 462c07424..8c1ab8d49 100644 --- a/vignettes/convert_r_d_OR.Rmd +++ b/vignettes/convert_r_d_OR.Rmd @@ -138,10 +138,11 @@ Let's give it a try: thresh <- 22500 # 2. dichotomize the outcome -hardlyworking$salary_high <- hardlyworking$salary < thresh +hardlyworking$salary_low <- factor(hardlyworking$salary < thresh, + labels = c("high", "low")) # 3. Fit a logistic regression: -fit <- glm(salary_high ~ is_senior, +fit <- glm(salary_low ~ is_senior, data = hardlyworking, family = binomial() ) @@ -152,4 +153,31 @@ parameters::model_parameters(fit) oddsratio_to_d(-1.22, log = TRUE) ``` +That's very close to Cohen's _d_ we got above ($d=-0.72$). + +We can get an even closer estimate +by accounting for the rate of low salaries in the reference group. + +```{r} +proportions( + table(is_senior = hardlyworking$is_senior, + salary_low = hardlyworking$salary_low), + margin = 1 +) + +# Or +odds_to_probs(1.55, log = TRUE) +``` + +As we can see, 82.5% of non-senior workers have a low salary. +We can plug that in to `oddsratio_to_d()`: + +```{r} +oddsratio_to_d(-1.22, p0 = 0.825, log = TRUE) +``` + +We have successfully recovered the standardized mean difference +between seniors and non-senior' salaries +by only observing a dichotomize salary ("low/high salary"). + # References