-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathL12.R
209 lines (159 loc) · 7.05 KB
/
L12.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
###############################################################################
### In this tutorial, we will use neural network models to predict alcohol ###
### using some of the other variables in the wine quality dataset. ###
###############################################################################
library(nnet) # Fits neural net models
set.seed(74364139)
source("Helper Functions.R")
source("Read Wine Data.R")
#############################################################################
### Fitting neural network models is a bit more involved than we're ###
### used to. The algorithm can get stuck in locally optimal solutions ###
### (see mammatus cloud example), so we have to re-fit a bunch of ###
### times on the same data and choose the fit that looks best. ###
#############################################################################
### 75%/25% split into training and validation sets
n = nrow(data)
n.train = floor(n*0.75)
n.valid = n - n.train
inds = c(rep(1, times = n.train), rep(2, times = n.valid))
inds.rand = inds[sample.int(n)]
data.train = data[inds.rand == 1,]
X.train.raw = data.train[,-5]
Y.train = data.train[,5]
data.valid = data[inds.rand == 2,]
X.valid.raw = data.valid[,-5]
Y.valid = data.valid[,5]
### When fitting neural networks, it's best to scale the training set so
### that all predictors lie between 0 and 1. We then have to apply the same
### scaling to the validation set, so the validation set might have some
### observations below 0 or above 1. This is fine.
### We can use Haolun's function, which scales each column in x1 so that
### the min and max in each column of x2 are 0 and 1 respectively.
rescale <- function(x1,x2){
for(col in 1:ncol(x1)){
a <- min(x2[,col])
b <- max(x2[,col])
x1[,col] <- (x1[,col]-a)/(b-a)
}
x1
}
X.train = rescale(X.train.raw, X.train.raw)
X.valid = rescale(X.valid.raw, X.train.raw) # Be careful with the order
### Before we can fit the model, we need to choose tuning parameters. For
### now, let's just use 2 hidden nodes and shrinkage of 0.01.
n.hidden = 2
shrink = 0.01
### Now we are ready to fit a neural network, using the nnet() function
### from the nnet package. This function has a few inputs that we need
### to include. First, it uses x,y syntax, rather than model formula syntax.
### Next, we can set the number of hidden nodes using "size", and the amount
### of shrinkage using "decay". We also need to specify that we want
### continuous predictions (instead of categorical) by setting linout to TRUE.
### Finally, we have to tell nnet() the maximum number of iterations we will
### allow it to use internally to try to optimize the model. This input
### is called maxit, and you can usually just set it to 500.
fit.nnet = nnet(y = Y.train, x = X.train, linout = TRUE, size = n.hidden,
decay = shrink, maxit = 500)
### Remember that we need to fit multiple models and choose the best one
### so we can avoid the local minimum problem. Let's try 20 models. We
### will need to save the SSE of each one, as well as the models themselves
### so we can go back and recover the winner.
n.nnets = 20 # Number of times to re-fit
### Container for SSEs
all.SSEs = rep(0, times = 20)
### Container for models.
### Note: I have used a list here, because this is the only data type in R
### that can contain complicated objects.
all.nnets = list(1:20)
for(i in 1:n.nnets){
### Fit model. We can set the input "trace" to FALSE to suppress the
### printed output from the nnet() function.
this.nnet = nnet(y = Y.train, x = X.train, linout = TRUE, size = n.hidden,
decay = shrink, maxit = 500, trace = FALSE)
### Get the model's SSE
this.SSE = this.nnet$value
### Store results. We have to use double square brackets when storing or
### retrieving from a list.
all.SSEs[i] = this.SSE
all.nnets[[i]] = this.nnet
}
### Get the best model. We have to use double square brackets when storing or
### retrieving from a list.
ind.best = which.min(all.SSEs)
fit.nnet.best = all.nnets[[ind.best]]
### Get predictions and MSPE
pred.nnet = predict(fit.nnet.best, X.valid)
MSPE.nnet = get.MSPE(Y.valid, pred.nnet)
###########################################################################
### So far, so good, but we haven't done any tuning yet. Let's use ###
### 10-fold CV to choose between 1/5/9 hidden nodes, and 0/1/5/10 ###
### shrinkage. To avoid bad local minima, re-fit each model 20 times ###
###########################################################################
M = 20 # Number of times to re-fit each model
### Define parameter values and use expand.grid() to get all combinations
all.n.hidden = c(1, 5, 9)
all.shrink = c(0, 1, 5, 10)
all.pars = expand.grid(n.hidden = all.n.hidden,
shrink = all.shrink)
n.pars = nrow(all.pars) # Number of parameter combinations
K = 5 # Number of folds
### Create folds
folds = get.folds(n, K)
### Create container for MSPEs
CV.MSPEs = array(0, dim = c(K, n.pars))
for(i in 1:K){
### Print progress update
print(paste0(i, " of ", K))
### Split data and rescale predictors
data.train = data[folds != i,]
X.train.raw = data.train[,-5]
X.train = rescale(X.train.raw, X.train.raw)
Y.train = data.train[,5]
data.valid = data[folds == i,]
X.valid.raw = data.valid[,-5]
X.valid = rescale(X.valid.raw, X.train.raw)
Y.valid = data.valid[,5]
### Fit neural net models for each parameter combination. A second
### for loop will make our life easier here
for(j in 1:n.pars){
### Get current parameter values
this.n.hidden = all.pars[j,1]
this.shrink = all.pars[j,2]
### We need to run nnet multiple times to avoid bad local minima. Create
### containers to store the models and their errors.
all.nnets = list(1:M)
all.SSEs = rep(0, times = M)
### We need to fit each model multiple times. This calls for another
### for loop.
for(l in 1:M){
### Fit model
fit.nnet = nnet(X.train, Y.train, linout = TRUE, size = this.n.hidden,
decay = this.shrink, maxit = 500, trace = FALSE)
### Get model SSE
SSE.nnet = fit.nnet$value
### Store model and its SSE
all.nnets[[l]] = fit.nnet
all.SSEs[l] = SSE.nnet
}
### Get best fit using current parameter values
ind.best = which.min(all.SSEs)
fit.nnet.best = all.nnets[[ind.best]]
### Get predictions and MSPE, then store MSPE
pred.nnet = predict(fit.nnet.best, X.valid)
MSPE.nnet = get.MSPE(Y.valid, pred.nnet)
CV.MSPEs[i, j] = MSPE.nnet # Be careful with indices for CV.MSPEs
}
}
### We can now make an MSPE boxplot. It would be nice to have more
### informative names though. We can construct names from all.pars
### using the paste0() function.
names.pars = paste0(all.pars$n.hidden,",",
all.pars$shrink)
colnames(CV.MSPEs) = names.pars
### Make boxplot
boxplot(CV.MSPEs, las = 2, main = "MSPE Boxplot")
### Get relative MSPEs and make boxplot
CV.RMSPEs = apply(CV.MSPEs, 1, function(W) W/min(W))
CV.RMSPEs = t(CV.RMSPEs)
boxplot(CV.RMSPEs, las = 2, main = "RMSPE Boxplot")