-
Notifications
You must be signed in to change notification settings - Fork 0
/
line plot.Rmd
136 lines (109 loc) · 5.2 KB
/
line plot.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
---
title: "Line plots"
author: "Grace Kim"
date: "12/8/2019"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(readxl)
library(janitor)
library(scales)
```
```{r}
############# Line plot data ##############
# 2015, 2005, 1995, 1985 Religion data for Korea
x2015 <- read_xlsx("Korea/raw-data/2015_Religions_Korea.xlsx") %>%
clean_names()
x2005 <- read_xlsx("Korea/raw-data/2005_Religions_Korea.xlsx") %>%
clean_names()
x1995 <- read_xlsx("Korea/raw-data/1995_Religions_Korea.xlsx") %>%
clean_names()
x1985 <- read_xlsx("Korea/raw-data/1985_Religions_Korea.xlsx") %>%
clean_names()
# Fixes merged column problem from excel for the dataset
x2015 <- x2015 %>% fill(gender) %>% fill(korean_region)
x2005 <- x2005 %>% fill(gender) %>% fill(korean_region)
x1995 <- x1995 %>% fill(gender) %>% fill(korean_region)
x1985 <- x1985 %>% fill(gender) %>% fill(korean_region)
# Data cleaning, need to only get the regions of all the total populations of Korea of
# 2015, only the gender and age totals, and we only selected the specific religions
# we wanted to view on the line plot, christianity and buddhism and religious totals
# and non religious totals
totals2015 <- x2015 %>%
filter(korean_region == "All of Korea") %>%
filter(gender == "Korea - Total") %>%
filter(age == "Total") %>%
select(total,religious_total,christianity_protestant,
christianity_catholic,buddhism,no_religion) %>%
mutate(year = "2015")
# Data cleaning, need to only get the regions of all the total populations of Korea of
# 2005, only the gender and age totals, and we only selected the specific religions
# we wanted to view on the line plot, christianity and buddhism and religious totals
# and non religious totals
totals2005 <- x2005 %>%
filter(korean_region == "All of Korea") %>%
filter(gender == "Korea - Total") %>%
filter(age == "Total") %>%
select(total,religious_total,christianity_protestant,
christianity_catholic,buddhism,no_religion) %>%
mutate(year = "2005")
# Data cleaning, need to only get the regions of all the total populations of Korea of
# 1995, only the gender and age totals, and we only selected the specific religions
# we wanted to view on the line plot, christianity and buddhism and religious totals
# and non religious totals
totals1995 <- x1995 %>%
filter(korean_region == "All of Korea") %>%
filter(gender == "Korea - Total") %>%
filter(age == "Total") %>%
select(total,religious_total,christianity_protestant,
christianity_catholic,buddhism,no_religion) %>%
mutate(year = "1995")
# Data cleaning, need to only get the regions of all the total populations of Korea of
# 1985, only the gender and age totals, and we only selected the specific religions
# we wanted to view on the line plot, christianity and buddhism and religious totals
# and non religious totals
totals1985 <- x1985 %>%
filter(korean_region == "All of Korea") %>%
filter(gender == "Total") %>%
filter(age == "Total") %>%
select(total,religious_total,christianity_protestant,
christianity_catholic,buddhism,no_religion) %>%
mutate(year = "1985")
# Combined the four different years datas so that we could have it all in one set
combined2 <- rbind(totals1985, totals1995, totals2005, totals2015) %>%
mutate(freq = 6)
# Converted to a data frame, duplicated each row 6 times so that we could make a column
# that contains all of the various religion data for grouped line bar plots
combined <- as.data.frame(combined2[rep(row.names(combined2),
combined2$freq), 1:7])
# Made several rows that could contain numbers for the four different religous population
# numbers, and the total population numbers and non religious population numbers
# had a identifier row called religion and numbers in population
# needed a case_when function because we had so many different possibilities
combined <- combined %>%
mutate(religion = case_when(row_number() %% 6 == 1 ~ "Total Population",
row_number() %% 6 == 2 ~ "All Religions",
row_number() %% 6 == 3 ~ "Christianity Protestant",
row_number() %% 6 == 4 ~ "Christianity Catholic",
row_number() %% 6 == 5 ~ "Buddhism",
row_number() %% 6 == 0 ~ "No Religion",
TRUE ~ "no")) %>%
mutate(population = case_when(row_number() %% 6 == 1 ~ total,
row_number() %% 6 == 2 ~ religious_total,
row_number() %% 6 == 3 ~ christianity_protestant,
row_number() %% 6 == 4 ~ christianity_catholic,
row_number() %% 6 == 5 ~ buddhism,
row_number() %% 6 == 0 ~ no_religion,
TRUE ~ "no"))
# line plot for the year by data change of the various line plots
combined %>%
ggplot(aes(x=year,
y=as.numeric(population),
group=religion,
color=religion, size = 2)) +
geom_line() +
scale_y_continuous(limits = c(0,50000000), labels = comma)+
labs(x = "Year", y = "Population")
```