-
Notifications
You must be signed in to change notification settings - Fork 0
/
sudan_health_nutrition_3.R
166 lines (114 loc) · 6.23 KB
/
sudan_health_nutrition_3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# GENERAL CHARACTERISTICS
tbl_summary(child, include = c(accessEducation, accessBasicEducation, everAttendedSchool))
# INDIVIDUAL FACTORS
tbl_summary(child, include = c(age, sex), by = accessEducation)
tbl_summary(child, include = c(age, sex), by = accessBasicEducation)
tbl_summary(child, include = c(age, sex), by = accessBasicEducation)
# HEALTH-RELATED FACTORS
tbl_summary(child, include = c(vaccineRecord, diarrhoea), by = accessEducation)
# STRUCTURAL FACTORS
tbl_summary(child, include = c(healthInsurance, noWASH, schoolFar, displacement), by = accessEducation)
# Barriers to basic pre-school education ---------------------------------------
#################Create a table of na values per variable####################
# create a loop to determine number of na values per variable
na_list<-c()
for (i in 1:ncol(child)) {
na_count<- sum(is.na(child[,i]))
print(na_count)
na_list<-c(na_list,na_count)
}
colname_list<-colnames(child)
##create a table of na values
na_tbl <- data.frame(colname_list, na_list)
##naming cols
colnames(na_tbl) = c("variable", "values missing")
##calc percentage of na variables
na_tbl[,3]<-round( na_tbl$`values missing`/nrow(child) *100,2)
na_tbl[,4]<-nrow(child)-na_tbl$`values missing`
##naming cols
colnames(na_tbl) = c("variable", "missing","missing %","present")
################################ Descriptive analysis ######################
#Health-related factors
child <- child %>%
mutate(everAttendedSchool = factor(everAttendedSchool, labels = c("No", "Yes")),
safe_water = factor(vaccineRecord, labels = c("No", "Yes")))
child %>%
drop_na(everAttendedSchool, vaccineRecord) %>%
tabyl(everAttendedSchool, vaccineRecord)
# child %>%
# cross_tbl(child, by = "everAtendedSchool")
####################################### Descriptive 2 ######################
child_health <- child
# exclude rows where 'age' is NA
child_health_clean <- child_health[!is.na(child_health$age),]
# GENERAL CHARACTERISTICS
tbl_summary(child_health)
tbl_summary(child_health, include = c(accessEducation, accessBasicEducation, everAttendedSchool))
# INDIVIDUAL FACTORS
# 2x2 Table for Sex and Access to Basic Education
table(child_health$sex, child_health$accessBasicEducation, useNA = "ifany")
# Chi=square test to check the association
chisq.test(table(child_health$sex, child_health$accessBasicEducation, useNA = "no"))
# 2x2 Table for sex and Access Education
table(child_health$sex, child_health$accessEducation, useNA = "ifany")
# Chi=square test
chisq.test(table(child_health$sex, child_health$accessEducation, useNA = "no"))
# 2x2 Table for sex and Access Preschool
table(child_health$sex, child_health$accessPreSchool, useNA = "ifany")
# Chi=square test
chisq.test(table(child_health$sex, child_health$accessPreSchool, useNA = "no"))
# HEALTHCARE FACTORS
# 2x2 table for Vaccine Record and Access to Basic Education
table(child_health$vaccineRecord, child_health$accessBasicEducation, useNA = "ifany")
# Chi-square test for association
chisq.test(table(child_health$vaccineRecord, child_health$accessBasicEducation, useNA = "no"))
# 2x2 table for Diarrhoea and Access to Basic Education
table(child_health$diarrhoea, child_health$accessBasicEducation, useNA = "ifany")
# Chi-square test for association
chisq.test(table(child_health$diarrhoea, child_health$accessBasicEducation, useNA = "no"))
# 2x2 table for Fever and Access to Basic Education
table(child_health$fever, child_health$accessBasicEducation, useNA = "ifany")
# Chi-square test for association
chisq.test(table(child_health$fever, child_health$accessBasicEducation, useNA = "no"))
# SOCIALCULTURAL FACTORS
# 2x2 table for early marriage and access to basic education
table(child_health$earlyMarriage, child_health$accessBasicEducation, useNA = "ifany")
# Chi-square test for association
chisq.test(table(child_health$earlyMarriage, child_health$accessBasicEducation, useNA = "no"))
# 2x2 table for displacement and access to basic education
table(child_health$displacement, child_health$accessBasicEducation, useNA = "ifany")
# Chi-square test for association
chisq.test(table(child_health$displacement, child_health$accessBasicEducation, useNA = "no"))
# CONDUCTING MULTIPLE LOGISTIC REGRESSION
# Convert categorical variables to factors
child_health$sex <- as.factor(child_health$sex)
child_health$vaccineRecord <- as.factor(ifelse(is.na(child_health$vaccineRecord), NA, ifelse(child_health$vaccineRecord == 1, "Yes", "No")))
child_health$earlyMarriage <- as.factor(ifelse(is.na(child_health$earlyMarriage), NA, ifelse(child_health$earlyMarriage == 1, "Yes", "No")))
# Logistic regression model
model <- glm(accessBasicEducation ~ age + sex + vaccineRecord, data = child_health, family = binomial())
summary(model)
# Calculate and plot deviance residuals
residuals <- residuals(model, type = "deviance")
plot(residuals, type = "b", main = "Deviance Residuals", xlab = "Observation", ylab = "Deviance Residual")
# SUMMARIZING DISTRIBUTION OF KEY VARIABLES
# Summary statistics for age
summary (child_health$age)
# Distribution of sex
table(child_health$sex)
# Distribution of diarrhoea and ARI cases
table(child_health$diarrhoea)
table(child_health$ari)
# Exploring relationships between demographic factors and health outcomes
# Average age for diarrhoe cases
aggregate(age ~ diarrhoea, data = child_health, FUN = mean, na.rm = TRUE)
# Cross-tabulation of sex and diarrhoea cases
xtabs(~ sex + diarrhoea, data = child_health)
# Cross-tabulation of sex and ARI cases
xtabs(~ sex + ari, data = child_health)
# Visual exploration
# Age distribution
ggplot(child_health, aes(x = age)) + geom_histogram(binwidth = 1, fill = "blue", color = "black") + labs(title = "Age Distribution", x = "Age", y = "Count")
# Diarrhoea cases by sex
ggplot(child_health, aes(x = accessEducation, fill = factor(ari))) + geom_bar(position = "fill") + scale_fill_manual(values = c("0" = "grey", "1" = "red"), labels = c("no", "yes")) + labs(title = "Diarrhoe Cases by Access to Education", x = "Access to Education", y = "Proportion", fill = "Diarrhoea")
# ARI cases by sex
ggplot(child_health, aes(x = sex, fill = factor(ari))) + geom_bar(position = "fill") + scale_fill_manual(values = c("0" = "grey", "1" = "green"), labels = c("No", "Yes")) + labs(title = "ARI Cases by Sex", x = "Sex", y = "Proportion", fill = "ARI")