-
Notifications
You must be signed in to change notification settings - Fork 0
/
missR.R
174 lines (142 loc) · 5.28 KB
/
missR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# Helper functions for the face analysis of Miss Finland Beauty competition
# Tuomo Nieminen 2016
# Image visualisation
# ------------------
# draw a face
drawFace <- function(face, newdev=F, main = rownames(face)) {
##Draws the argument face in greyscale.
##
##Args:
## face: a vector containing the face as a greyscale image.
## newdev: open a new device?
m <- as.numeric(face)
dim <- sqrt(length(m))
m<- matrix(m, dim, dim)
m <- t(m[dim:1, ])
if(newdev) dev.new()
par(mar = c(0,0.5,2,0))
image(m, col=grey(seq(0, 1, length=256)), main=main, cex.main = 0.8)
}
# draw multiple faces
drawMultipleFaces <- function(faces, dev = NULL, titles = rownames(faces), cex = 1) {
# Applies drawFace() to each row of faces
##Args:
## faces: a matrix or data frame with greyscale faces as rows.
nfaces <- nrow(faces)
pardim <- ceiling(sqrt(nfaces))
if(!is.null(dev)) dev()
temp_par <- par(mfrow=c(pardim,pardim), yaxt="n", xaxt="n", cex = cex)
i <- 1
for(i in 1:nfaces) {
drawFace(faces[i,],main=titles[i])
i <- i + 1
}
par(temp_par)
}
# return n random faces
get_randomface <- function(faces, size=nrow(faces)) {
i <- sample(1:nrow(faces), size = size, replace = F)
faces[i,, drop = F]
}
# draw a sample of faces
drawSample<-function(faces, n=16, dev = NULL) {
todraw <- get_randomface(faces, size =n)
drawMultipleFaces(todraw, dev = dev)
}
# PCA
# ----------
# do pca on faces (TODO: could just use prcomp() instead ... done in index.Rmd)
faces.PCA <- function(faces) {
centered_faces = center_faces(faces)
covariance <- cov(centered_faces)
eigen(covariance, symmetric=T)
}
# PCA representation of a face
project_face <- function(face, PC, which = 1:2) {
V <- PC[,which]
P <- V%*%t(V)
t(P%*%face)
}
# draws n projected and original faces
compare_faces <- function(faces, PC, which = 1, n = 1) {
faces <- get_randomface(faces, size = n)
projected_faces <- t(apply(faces, 1, project_face, PC = PC, which = which))
both_faces <- matrix(NA, ncol = ncol(faces), nrow = n*2)
both_faces[((0:(n-1))*2) + 1,] <- faces
both_faces[((0:(n-1))*2) + 2,] <- projected_faces
rownames(both_faces) <- sapply(rownames(faces), rep, times = 2)
drawMultipleFaces(both_faces)
}
# Predicting and cross-validation
# ------------------------------
# Allows the estimation of predictive linear models for classification and regression,
# such as L1- or L2-regularized logistic regression
library(LiblineaR)
# test different logistic regression models
# on a grid of regularization costs and dimensions of eigenfaces
# returns a matrix of average correct predictions (see cross_validate below)
cross_validate_grid <- function(data, PC, C, FD, target="Kolme") {
results <- sapply(C, function(c) {
res <- sapply(FD, function(fd) {
cross_validate(data=data, PC = PC,cost = c, facedim = fd,type = 6, target = target)
})
# cat(100*which(C==c)/length(C),"% done \n")
res
})
results
}
# predicts the top3 of each competition year by using data from other years.
# returns average prct of correct predictions
cross_validate <- function(data, PC, cost, facedim, type=6, target="Kolme") {
nwinners <- ifelse(target=="Kolme",3,1)
V <- PC[,1:facedim]
faces <- as.matrix(data[,19:ncol(data)])
newfaces <- faces %*% V
X <- cbind(data[,c(2,8:18)], newfaces)
Y <- factor(data[[target]])
years <- unique(data$year)
pred <- sapply(years, function(year) {
train <- data$year != year
X_tr <- X[train,]; X_te <- X[!train,]
Y_tr <- Y[train]; Y_te <- Y[!train]
winners <- which(Y_te=="1")
if(length(winners)==0) {
return(c(correct = 0))
}
fit <- LiblineaR::LiblineaR(data=X_tr, target=Y_tr, type=type,
cost=cost, wi=c("0"=(10-nwinners)/10,"1"=nwinners/10))
pred <- predict(fit, newx = X_te, proba=T)
p7 <- pred$probabilities[,1]
# add comperitor indexes
p7 <- cbind(p7,1:nrow(X_te))
p7 <- p7[order(p7[,1]),]
# + 1 to make the predictions a little easier... this is a weird measure of accuracy, could be changed.
c(correct=sum(p7[,2][1:(nwinners+1)] %in% winners)/length(winners))
})
prc_correct <- sum(pred)/length(years)
prc_correct
}
# predict future results using previous data and several model parameters
# model parameters are chosen by cross validation (see above)
fit_l1_logreg <- function(tr_data, PC, costs, facedims, target_data, target="Kolme") {
nwinners <- ifelse(target=="Kolme",3,1)
Y <- tr_data[[target]]
X <- tr_data[,c(2,8:18), drop = F]
X_ <- target_data[,c(2,8:18)]
W <- matrix(0, ncol = ncol(X) + max(facedims) + 1, nrow = length(costs))
colnames(W) <- c(colnames(X), paste("PC", 1:max(facedims)), "bias")
i <- 0
probs <- mapply(function(c,fd) {
V <- PC[,1:fd, drop = F]
faces <- as.matrix(tr_data[,19:ncol(tr_data)]) %*% V
X_tr <- as.matrix(cbind(X, faces))
fit <- LiblineaR(data = X_tr, target = Y, type=6,
cost = c, wi = c("0" = (10 - nwinners) / 10, "1" = nwinners/10))
W[(i <- i + 1), 1:length(fit$W)] <<- fit$W
target_faces <- as.matrix(target_data[,19:ncol(target_data)]) %*% V
X_target <- cbind(X_, target_faces)
pred <- predict(fit, newx = as.matrix(X_target), proba = T)
pred$probabilities[,2]
}, costs, facedims)
return(list(weights = W, probs = probs))
}