Skip to content

Commit

Permalink
integrate all new changes
Browse files Browse the repository at this point in the history
  • Loading branch information
rempsyc committed Jul 2, 2024
1 parent 6733f34 commit 9b498ab
Show file tree
Hide file tree
Showing 11 changed files with 601 additions and 376 deletions.
145 changes: 145 additions & 0 deletions papers/preprint/fig1.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@

```{r fig1_prep}
update_geom_defaults("point", aes(size = 3, color = "black", fill = "grey80", stroke = 1))
theme_set(
theme_classic() +
theme(
axis.line = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, linewidth = rel(2)),
legend.position = "bottom",
plot.title.position = "plot"
)
)
# Data --------------------------------------------------------------------
data("women", package = "datasets")
# Convert units to metric
women$height <- women$height * 2.54
women$weight <- women$weight / 2.205
# Make some more data
set.seed(42)
women <- rbind(
women,
# duplicate with jitter
apply(women, 2, jitter, factor = 5),
# short and overweight
c(155, 73),
# just tall
c(230, 103)
)
# Descriptives
ranges <- lapply(women, range)
M <- sapply(women[, 1:2], mean)
V <- cov(women[, 1:2])
s <- sqrt(diag(V))
# Plot constants
L <- list(
scale_shape_manual(NULL, labels = c("Not an Outlier", "Outlier"), values = c(20, 24)),
coord_cartesian(ranges[["height"]], ranges[["weight"]]),
labs(x = "Height [cm]", y = "Weight [kg]")
)
# Univariate methods ------------------------------------------------------
women[["univ_outlier"]] <- check_outliers(women, method = "zscore")
Z <- c(-3.290527, -2, -1, 0, 1, 2, 3.290527)
uni_thresholds <- data.frame(
X = M["height"] + s["height"] * Z,
Y = M["weight"] + s["weight"] * Z,
Z_lab = Z |>
format_value(digits = "signif3",
style_positive = "plus",
style_negative = "minus") |>
paste0(" SD") |>
replace(4, "Mean")
)
p_uni <- ggplot(women, aes(height, weight)) +
geom_texthline(aes(yintercept = Y, label = Z_lab), data = uni_thresholds,
hjust = 0.8, halign = "center",
color = "grey60", size = 3) +
geom_point(aes(shape = univ_outlier)) +
L +
labs(title = "(A) Univariate")
# Multivariate methods --------------------------------------------
women[["multiv_outlier"]] <- check_outliers(women[,1:2], method = "mahalanobis")
multi_thresholds <- qchisq(p = 0.001, df = 2, lower.tail = FALSE)
Ds <- c(1, 2, sqrt(multi_thresholds))
ellipses <- lapply(setNames(nm = Ds), function(.D) {
coords <- ellipse(
center = M,
shape = V,
radius = .D,
segments = 100,
draw = FALSE
)
coords <- as.data.frame(coords)
coords[["D"]] <- .D
coords
})
ellipses <- do.call("rbind", ellipses)
colnames(ellipses)[1:2] <- colnames(women)[1:2]
ellipses[["D_lab"]] <- format_value(ellipses[["D"]], digits = "signif3")
p_multi <-
ggplot(women, aes(height, weight)) +
geom_point(aes(x = M["height"], y = M["weight"]),
color = "grey60", size = 4) +
geom_textcontour(aes(group = D_lab, label = D_lab), data = ellipses,
stat = "identity", hjust = 0.17,
color = "grey60", size = 3) +
geom_point(aes(shape = multiv_outlier)) +
L +
labs(title = "(B) Multivariate")
# Model-specific methods ------------------------------------------------
model <- lm(weight ~ height, data = women)
women[["model_outlier"]] <- check_outliers(model, method = "cook")
women[["y_hat"]] <- fitted(model)
a <- coef(model)[1]
b <- coef(model)[2]
p_model <-
ggplot(women, aes(height, weight)) +
geom_abline(intercept = a, slope = b, color = "grey60") +
geom_segment(aes(xend = height, yend = y_hat),
color = "grey60") +
geom_point(aes(shape = model_outlier)) +
L +
labs(title = "(C) Model")
# Combine plots ---------------------------------------------------
no_y <- theme(axis.text.y = element_blank(),
axis.title.y = element_blank())
fig1 <- p_uni + (p_multi + no_y) + (p_model + no_y) +
plot_layout(nrow = 1, guides = "collect")
```
165 changes: 73 additions & 92 deletions papers/preprint/paper.Rmd

Large diffs are not rendered by default.

114 changes: 106 additions & 8 deletions papers/preprint/paper.bib
Original file line number Diff line number Diff line change
@@ -1,5 +1,107 @@
@article{zijlstra2011outliers,
title={Outliers in questionnaire data: Can they be detected and should they be removed?},
author={Zijlstra, Wobbe P and van der Ark, L Andries and Sijtsma, Klaas},
journal={Journal of Educational and Behavioral Statistics},
volume={36},
number={2},
pages={186--212},
year={2011},
publisher={Sage Publications Sage CA: Los Angeles, CA},
doi = {10.3102/1076998610366263}
}

@software{carelesspackage,
title = {{careless}: Procedures for computing indices of careless responding},
author = {Yentes, R.D. and Wilhelm, F.},
year = {2023},
url = {https://cran.r-project.org/package=careless}
}

@article{curran2016methods,
title={Methods for the detection of carelessly invalid responses in survey data},
author={Curran, Paul G},
journal={Journal of Experimental Social Psychology},
volume={66},
pages={4--19},
year={2016},
publisher={Elsevier},
doi = {10.1016/j.jesp.2015.07.006}
}

@article{cao2018z,
title={Z-Glyph: Visualizing outliers in multivariate data},
author={Cao, Nan and Lin, Yu-Ru and Gotz, David and Du, Fan},
journal={Information Visualization},
volume={17},
number={1},
pages={22--40},
year={2018},
publisher={SAGE Publications Sage UK: London, England},
doi = {10.1177/1473871616686635}
}

@article{ward2023dealing,
title={Dealing with careless responding in survey data: Prevention, identification, and recommended best practices},
author={Ward, Mary K and Meade, Adam W},
journal={Annual Review of Psychology},
volume={74},
number={1},
pages={577--596},
year={2023},
publisher={Annual Reviews},
doi = {10.1146/annurev-psych-040422-045007}
}

@article{goldammer2020careless,
title={Careless responding in questionnaire measures: Detection, impact, and remedies},
author={Goldammer, Philippe and Annen, Hubert and St{\"o}ckli, Peter Lucas and Jonas, Klaus},
journal={The Leadership Quarterly},
volume={31},
number={4},
pages={101384},
year={2020},
publisher={Elsevier},
doi = {10.1016/j.leaqua.2020.101384}
}

@article{aruguete2019serious,
title={How serious is the ‘carelessness’ problem on Mechanical Turk?},
author={Aruguete, Mara S and Huynh, Ho and Browne, Blaine L and Jurs, Bethany and Flint, Emilia and McCutcheon, Lynn E},
journal={International Journal of Social Research Methodology},
volume={22},
number={5},
pages={441--449},
year={2019},
publisher={Taylor \& Francis},
doi = {10.1080/13645579.2018.1563966}
}

@article{chaloner1988bayesian,
title={A Bayesian approach to outlier detection and residual analysis},
author={Chaloner, Kathryn and Brant, Rollin},
journal={Biometrika},
volume={75},
number={4},
pages={651--659},
year={1988},
publisher={Oxford University Press},
doi = {10.1093/biomet/75.4.651}
}

@article{ciccione2023outlier,
title={Outlier detection and rejection in scatterplots: Do outliers influence intuitive statistical judgments?},
author={Ciccione, Lorenzo and Dehaene, Guillaume and Dehaene, Stanislas},
journal={Journal of Experimental Psychology: Human Perception and Performance},
volume={49},
number={1},
pages={129},
year={2023},
publisher={American Psychological Association},
doi = {10.1037/xhp0001065}
}

@article{mcneil1977interactive,
title={Interactive data analysis: a practical primer},
title={Interactive data analysis: A practical primer},
author={McNeil, Donald R},
journal={(No Title)},
year={1977},
Expand Down Expand Up @@ -108,8 +210,7 @@ @article{leys2013outliers
number = {4},
pages = {764-766},
year = {2013},
doi = {10.1016/j.jesp.2013.03.013},
url = {https://doi.org/10.1016/j.jesp.2013.03.013}
doi = {10.1016/j.jesp.2013.03.013}
}

@article{leys2018outliers,
Expand All @@ -120,7 +221,6 @@ @article{leys2018outliers
year = {2018},
issn = {0022-1031},
doi = {10.1016/j.jesp.2017.09.011},
url = {https://www.sciencedirect.com/science/article/pii/S0022103117302123},
author = {Christophe Leys and Olivier Klein and Yves Dominicy and Christophe Ley},
}

Expand All @@ -132,8 +232,7 @@ @article{simmons2011false
number = {11},
pages = {1359-1366},
year = {2011},
doi = {10.1177/0956797611417632},
URL = {https://doi.org/10.1177/0956797611417632},
doi = {10.1177/0956797611417632}
}

@software{easystatspackage,
Expand All @@ -153,8 +252,7 @@ @Article{ludecke2021performance
journal = {Journal of Open Source Software},
year = {2021},
pages = {3139},
doi = {10.21105/joss.03139},
url = {https://doi.org/10.21105/joss.03139}
doi = {10.21105/joss.03139}
}

@Article{patil2022datawizard,
Expand Down
Binary file modified papers/preprint/paper.pdf
Binary file not shown.
Loading

0 comments on commit 9b498ab

Please sign in to comment.