forked from H3nrycrosby/riaa_scraping_project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
R_riaa.Rmd
58 lines (49 loc) · 1.31 KB
/
R_riaa.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
---
title: "R Notebook"
output: html_notebook
---
```{r}
library(ggplot2)
library(gridExtra)
library(tabplot)
library(lsr)
library(corrplot)
library(dplyr)
setwd("~/riaa_proj")
riaa = read.csv('riaa_datetimes.csv', stringsAsFactors = FALSE)[,2:16]
```
```{r}
summary(riaa)
```
```{r}
## table plot all features on sortded SalePrice
library(plyr)
riaa <- riaa %>% filter(Group_Type != 'None') %>% filter(Genre == 'ALTERNATIVE' | Genre == 'POP' | Genre == 'ROCK' | Genre == 'R&B/HIP HOP') %>% arrange(Genre)
#riaa <- riaa[743:3590,]
colMtx <- matrix(names(riaa)[1:length(riaa)-1], nrow = 8)
for (i in 1:ncol(colMtx)) {
tableplot(riaa,
select_string = c(colMtx[,i], "Sales"),
sortCol = "Sales", decreasing = TRUE,
nBins = 30)
}
```
```{r}
summary(aov(riaa$Sales ~ riaa$Genre))
summary(aov(riaa$Sales ~ riaa$Media_Type))
summary(aov(riaa$Sales ~ riaa$Format_Type))
chisq.test(quiz.data)
summary(aov(riaa$Media_Type ~ riaa$Format_Type))
```
```{r}
x = riaa %>% group_by(Spotify, Genre) %>% summarize(count = n()) #%>% filter(Genre == 'ALBUM' | Genre == 'SINGLE')
#x = x %>% filter(!(Format_Type == 'VIDEO LONGFORM'))
x = spread(x, key=Genre, value=count)
names = x$Spotify
rownames(x) <- names
chisq.test(x[-1])
```
```{r}
library(dplyr)
riaa %>% group_by(Genre) %>% summarise(count n())
```