From 6084ab24016afe81afa0ed43478a7e393f944177 Mon Sep 17 00:00:00 2001
From: "Mattan S. Ben-Shachar" <mattansb@msbstats.info>
Date: Wed, 11 Dec 2024 12:47:53 +0200
Subject: [PATCH] Update convert_r_d_OR.Rmd

---
 vignettes/convert_r_d_OR.Rmd | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/vignettes/convert_r_d_OR.Rmd b/vignettes/convert_r_d_OR.Rmd
index 462c07424..8c1ab8d49 100644
--- a/vignettes/convert_r_d_OR.Rmd
+++ b/vignettes/convert_r_d_OR.Rmd
@@ -138,10 +138,11 @@ Let's give it a try:
 thresh <- 22500
 
 # 2. dichotomize the outcome
-hardlyworking$salary_high <- hardlyworking$salary < thresh
+hardlyworking$salary_low <- factor(hardlyworking$salary < thresh, 
+                                   labels = c("high", "low"))
 
 # 3. Fit a logistic regression:
-fit <- glm(salary_high ~ is_senior,
+fit <- glm(salary_low ~ is_senior,
   data = hardlyworking,
   family = binomial()
 )
@@ -152,4 +153,31 @@ parameters::model_parameters(fit)
 oddsratio_to_d(-1.22, log = TRUE)
 ```
 
+That's very close to Cohen's _d_ we got above ($d=-0.72$).
+
+We can get an even closer estimate 
+by accounting for the rate of low salaries in the reference group.
+
+```{r}
+proportions(
+  table(is_senior = hardlyworking$is_senior, 
+        salary_low = hardlyworking$salary_low), 
+  margin = 1
+)
+
+# Or
+odds_to_probs(1.55, log = TRUE)
+```
+
+As we can see, 82.5% of non-senior workers have a low salary. 
+We can plug that in to `oddsratio_to_d()`:
+
+```{r}
+oddsratio_to_d(-1.22, p0 = 0.825, log = TRUE)
+```
+
+We have successfully recovered the standardized mean difference 
+between seniors and non-senior' salaries 
+by only observing a dichotomize salary ("low/high salary").
+
 # References