-
Notifications
You must be signed in to change notification settings - Fork 2
/
app.R
71 lines (52 loc) · 1.44 KB
/
app.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(caret)
library(e1071)
filename <- "/Users/sumit/Documents/major_work/TestingAndTrainingDataSet.csv"
testing_0 <- read.csv(filename, header=TRUE)
#dataset
testing_0
#graph plotted
plot(pH~Aval.N,testing_0)
with(testing_0,text(pH~Aval.N,testing_0,labels=name,pos=4,cex=0.5))
#3d scattered plot pending.
testing_1 = testing_0[-1]
testing_1
#normalise the dataset.
m <- apply(testing_1,2,mean)
s <- apply(testing_1,2,sd)
testing_1 <- scale(testing_1,m,s)
testing_1
print(testing_1,digits = 3)
#elbow curve
wssplot <- function(data,nc=15,seed=1234){
wss <- (nrow(data)-1)*sum(apply(data,2,var))
for(i in 2:nc){
set.seed(seed)
wss[i] <- sum(kmeans(data,centers=i)$withinss)}
plot(1:nc,wss,type="b",xlab="Number of clusters",
ylab="Within Groups Sum of Squares")}
wssplot(testing_1,nc=19,seed=1234)
#kmeans
testing_kmeans <- kmeans(testing_1,3)
testing_kmeans
library("scatterplot3d")
scatterplot3d(testing_1[,2:4],color = testing_kmeans$cluster)
distance <- dist(testing_1)
print(distance, digits = 3)
#cluster dendogram
#complete
hc.c <- hclust(distance)
plot(hc.c,labels=testing_0$name)
#average
hc.a <- hclust(distance,method = "average")
plot(hc.a,labels=testing_0$name)
#cluster membership
member.c <- cutree(hc.c,3)
member.a <- cutree(hc.a,3)
#plot(member.c,member.a)
table(member.c, member.a)
#summary(testing_1)
#cluster means
aggregate(testing_1,list(member.c),mean)
library(cluster)
#silhoute plot
plot(silhouette(cutree(hc.c,3),distance))