-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
49 lines (49 loc) · 2.51 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
install.packages("dplyr")
library(dplyr)
install.packages("magrittr")
library(magrittr)
install.packages("Rcpp")
install.packages("readr")
library(readr)
install.packages("ggplot2")
library(ggplot2)
install.packages("plotly")
library(plotly)
install.packages("magrittr")
install.packages("magrittr")
df<-read.csv("C:\\Users\\pshiv\\Downloads\\Assignment1 Sumer2022-1\\Assignment1 Sumer2022\\healthcare_stroke_dataset.csv", TRUE, ",")
#return the first 5 rows of the dataset
head(df, 5)
#Create barplot showing gender with count with residence type
print('Task 3-a:')
library(ggplot)
print('Task 3-a:')
library(ggplot2)
print('Gender count with residence type')
ggplot(data = df, aes(x=gender, fill = Residence_type)) + geom_bar(position = 'dodge') + geom_text(stat='count', aes(label=..count..), position = position_dodge(width = 0.9), vjust=-1)
#https://intellipaat.com/community/16343/how-to-put-labels-over-geombar-for-each-bar-in-r-with-ggplot2
print('Task 4: Relationship between Age and BMI value:')
task_4_a <- df[c("id", "age", "heart_disease", "hypertension", "smoking_status", "bmi")]
task_4_a <- na.omit(task_4_a)
task_4_a <- task_4_a %>% mutate(bmi_data = cut(bmi, breaks = c(0,18.5,24.9,29.9, Inf), labels = c("<=18.5 Underweight", "18.5-24.9 Normal", "25-29.9 Pre-obese", ">30 obese")))
task_4_a <- task_4_a %>% mutate(age_data = cut(age, breaks = c(0,18,40,60,80, Inf), labels = c("<=18", "18-40", "40-60", "60-80", ">80")))
library(ggplot2)
print("bmi range for each age group")
#Bar Plot
bar_plot <- ggplot(data=task_4_a, aes(x = age_data, fill = bmi_data, width = 1)) + geom_bar(position = 'dodge', color = "black") + scale_fill_manual(values = c("#97C1A9", "#9AB7D3", "#FFB8B1", "#55CBCD")) + geom_text(stat='count', aes(label=..count..), colour = "black", size=3, position=position_dodge(width=0.9), vjust=-1)
bar_plot + theme_dark() + labs(title = "BMI classification according to age group", subtitle = "Relationship by BMI range")
scatter_plot <- ggplot(task_4_a, aes(x = age, y=bmi)) +
geom_point(aes(color = factor(bmi_data)), size = 1) +
scale_x_continuous(limits = c(0, max(task_4_a$age)), breaks = seq(0, max(task_4_a$age), 10)) +
scale_y_continuous(limits = c(0, max(task_4_a$age)), breaks = seq(0, max(task_4_a$age), 10)) +
geom_smooth(formula = y ~ x,
method = "loess",
colour = "black",
span = 0.5,
col = "#EAEAEA",
se = FALSE,
size = 1)
scatter_plot + theme_light() + scale_color_manual(values = c('#CD978B','#55CBCD', '#DFCCF1', "#97C1A9")) +
labs(
title = "Scatter Plot between age and bmi",
substitle = "Relationship by bmi range")