-
Notifications
You must be signed in to change notification settings - Fork 0
/
Main page.Rmd
252 lines (178 loc) · 7.93 KB
/
Main page.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
---
title: "Religion in South Korea"
author: "Grace Kim"
date: "11/13/2019"
output: html_document
---
This was a practice document for the initial creation of all the various graphics in this project. This contains many of the bar graphs for Gender, Age, and the world v.s. south korea bargraph.
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
# Need all of the libraries!
library(tidyverse)
library(readxl)
library(janitor)
library(hrbrthemes)
library(openxlsx)
library(stringr)
```
```{r}
# 2015 south Korea census data
x <- read_xlsx("Korea/raw-data/2015_Religions_Korea.xlsx") %>%
clean_names()
#fixes merged column problem from excel for the dataset
test<- x %>% fill(gender) %>% fill(korean_region)
# Data from world peabody research center, has numbers of every nations various religious
# populations data needed to set the various column attributes as its a csv
world<- read_csv("Korea/raw-data/World_Religion_Data.csv", col_types = cols(
Country = col_character(),
Christian = col_double(),
Muslim = col_double(),
Unaffiliated = col_double(),
Hindu = col_double(),
Buddhist = col_double(),
Folk = col_double(),
Other = col_double(),
Jewish = col_double()
)) %>%
clean_names()
```
```{r Gender circular bar plot}
#trying to filter data for only the total number of korean data
gender_data <- x %>%
filter(korean_region == "All of Korea") %>%
filter(gender != "Korea - Total") %>%
filter(age == "Total")
#needs to be a data frame in order to use gather
gender_data <- data.frame(gender_data)
#gather flips the rows and columns
gender_data_vertical <- gather(gender_data)
# take only the columns we need for the grouped plot
gender_data_vertical <- gender_data_vertical[11:30,]
# formatting for the various column names
gender_data_vertical<- gender_data_vertical %>%
mutate(gender = ifelse(row_number()%%2 == 0,
"Female",
"Male")) %>%
mutate(religion = ifelse(row_number()%%2 == 0,
str_c(key, "F"),
str_c(key, "M")))
# add the necessary row numbers needed
gender_data_vertical <- transform(gender_data_vertical, value = as.numeric(value))
# arranged from desc
gender_data_vertical<- gender_data_vertical%>%
arrange(desc(value))
# plotted the percentages of the different gender groups that practice each religion
# filled the entire chart so it could be a percentage based approximation so we could
# compare each religious group on a normalized scale
ggplot(gender_data_vertical,aes(x = reorder(as.factor(religion),value), y = value, fill = gender)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_manual(values = c("#FF5733","#15C9C1")) +
labs(title = "Male and Female Religous Data", x = "Religions", y = "Percentage Practiced by Population")
gender <- ggplot(gender_data_vertical, aes(x = reorder(as.factor(religion),value), y = value, fill = gender)) +
# Note that id is a factor. If x is numeric, there is some space between the first bar
# This add the bars with a blue color
geom_bar(stat="identity") +
scale_y_log10()+
theme_minimal()+
theme(
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
# This makes the coordinate polar instead of cartesian.
coord_polar(start = 0)
gender
```
```{r Age line graphs}
# data cleaning for age to get only the total populations of each age,
# needed to make sure age was a numeric variable as well by only taking the first
# two characters of the row names
age_data <- x %>%
filter(korean_region == "All of Korea") %>%
filter(age != "Total") %>%
filter(gender == "Korea - Total") %>%
select(age, religious_total) %>%
mutate(age_number = as.numeric(substr(age,1,2))) %>%
mutate(age2 = as.numeric(age_number)^2)
#only so i can see the numbers
quadratic.model <- lm(data = age_data, religious_total ~ age_number + age2)
summary(quadratic.model)
# had a age plot with a quadratic modeled on top of it so that we can see the trends
# as the religious populations move within generations
ggplot(age_data, aes(x = as.numeric(age_number), y = religious_total, fill = age_number)) +
geom_bar(stat = "identity")+
stat_smooth(method = "lm", formula = y ~ x + I(x^2), size = 1)
```
```{r World Religions}
# summarized all the percentages of the nations around the world
world_summary<- world %>%
summarize(christian = mean(as.numeric(christian)),
muslim = mean(as.numeric(muslim)),
unaffiliated = mean(as.numeric(unaffiliated)),
hindu = mean(as.numeric(hindu)),
buddhist = mean(as.numeric(buddhist)),
folk = mean(as.numeric(folk)),
other = mean(as.numeric(other)),
jewish = mean(as.numeric(jewish)))
# took the korea totals from the world dataset
korea_totals <- world %>%
filter(country == "South Korea") %>%
select(-matches("country"))
# merged the two world summary and korea totals data together
world_korea_summary <- rbind(data.frame(world_summary),data.frame(korea_totals))
# changed rows and columns because of the grouped bar plot format we needed
vertical_world <- gather(world_korea_summary)%>%
mutate(country = ifelse(row_number()%%2 == 1, "World","South Korea"))
# grouped barplot to show the various religious follower populations between south Korea
# v.s. the rest of the world
ggplot(vertical_world, aes(x = key, y=value, fill = country))+
geom_bar(position="dodge", stat="identity")+
labs(x = "Religion",
y = "Percentages of Religious Followers",
fill = "",
title = "The Religions of the World vs. South Korea")
```
summarize(christianity = mean(as.numeric(christian))) %>% DONT LOOK DOWN HERE:
```{r OLD data cleaning}
korean_migration_data<- read_excel("Korea/raw-data/LongTerm_International_Migration_by_Sex_and_Age_Nationals__Foreigners__20191005095057.xlsx") %>% clean_names()
```
```{r OLD Bar Graph}
# Need to sort out my data first in order to make the grouped barchart
expanded_korean <- korean_migration_data %>%
filter(by_gender != "Total") %>%
filter(by_age != "unknown", by_age!="Total") %>%
# Filtered out total and unknowns since that was the duplicate of the combined data
group_by(by_age, by_gender) %>%
summarize(incoming_migrants_total = sum(incoming_migrants),
outcoming_migrants_total = sum(outcoming_migrants)) %>%
# Summarized the data that I had from the incoming and leaving migrants, needed to
# combine this data however into one column in order for me to make a grouped plot
mutate(freq = 2)
# made a column that would allow me to duplicate each row
expanded_korean
expanded_korean <- expanded_korean[rep(row.names(expanded_korean),
expanded_korean$freq), 1:4]
# Duplicated each row of the the expanded Korean dataset so I could have the incoming and
# leaving migrant numbers all in one column
# Made several rows that could contain both numbers for leaving and outcoming migrants
# had a identifier row called migrant_type and numbers in migrant_num
barplot_data<- expanded_korean %>%
mutate(migrant_type = ifelse(row_number()%%2 == 0,
"Incoming",
"Leaving")) %>%
mutate(migrant_num = ifelse(migrant_type == "Leaving",
outcoming_migrants_total,
incoming_migrants_total))
# Time to make the graph of our grouped barplot!
# needed to set the groups by migrant type and by age; also set the scale of y axis so
# that there wasn't weird scientific notation. Also split up by gender
ggplot(barplot_data, aes(x = by_age, y=migrant_num, fill = migrant_type))+
geom_bar(position="dodge", stat="identity")+
facet_grid(~by_gender) +
labs(x = "",
y = "#'s of incoming/leaving migrants",
fill = "Migrant Type",
title = "Migrants of South Korea")+
scale_y_continuous(labels = scales::comma)
```