-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path4-build-models.R
116 lines (85 loc) · 3.74 KB
/
4-build-models.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Install caret and all of it's dependencies:
#install.packages('caret', dependencies = c("Depends", "Suggests"))
library(caret)
# Get all model names for classification
m <- unique(modelLookup()[modelLookup()$forClass, "model"])
length(m)
# Remove slow/failing classification methods
removeModels <- c("AdaBag","AdaBoost.M1","bam","pda2","dwdRadial","rbf",
"dwdLinear","dwdPoly","gaussprLinear","gaussprPoly",
"rFerns","sddaLDA","smda","sddaQDA","xgbLinear","xgbTree",
"AdaBag","FH.GBML","ORFsvm","ownn","vbmpRadial","SLAVE",
"ORFlog","GFS.GCCL","ORFpls","snn","bagEarth","ORFridge",
"rmda","awnb","awtan","manb","nbDiscrete","nbSearch",
"ordinalNet","blackboost","tan","tanSearch","randomGLM",
"Rborist","FRBCS.W","FRBCS.CHI","evtree","bstTree",
"bagEarthGCV","bagFDA","rrlda","rfRules","rpartScore",
"adaptDA","CHAID","sparsediscrim","elmNN","FCNN4R",
"mxnet","PRIM","adaboost","RRF","amdai","chaid","dda",
"elm","hdrda","mlpSGD","msaenet","mxnetAdam","rlda")
m <- m[!m %in% removeModels]
length(m)
m
# Pre-load all packages (only partially works due to dependency issues etc)
suppressPackageStartupMessages(ll <- lapply(m, require, character.only = TRUE))
# Show which libraries are loaded
sessionInfo()
###################################################################################################
# X and Y for train function
Y <- train$Survived
X <- subset(train, select=-c(Survived))
# This function actually calls caret::train
trainCall <- function(i) {
cat("----------------------------------------------------\n");
cat(date(), "\n", i, "\n")
set.seed(123)
return(
tryCatch(t2 <- train(y=as.factor(Y), x=X, (i), trControl = trainControl(method = "cv")),
error = function(e) NULL)
)
}
###################################################################################################
# Register parallel front-end
library(doParallel)
cl <- makeCluster(detectCores())
registerDoParallel(cl)
# Use lapply to run training, required for try/catch error function to work
# Will probably run for an hour or two
# Add slow/failing methods to removeModels list above
# Check periodically for install dependency messages
system.time(t3 <- lapply(m, trainCall)) # Approx. 1 hour on old dual core laptop
# Stop cluster and register sequntial front end
stopCluster(cl)
registerDoSEQ()
###################################################################################################
# Remove NULL values, we only allow succesful methods
t4 <- t3[!sapply(t3, is.null)]
length(t3)
length(t4)
# Create training performance summary
trainSummary <- function(t4) {
MAX <- length(t4);
x1 <- character(MAX) # Method
x2 <- numeric(MAX) # Accuracy
x3 <- numeric(MAX) # Kappa
x4 <- numeric(MAX) # Run time (secs)
x5 <- character(MAX) # Long model name
for (i in 1:MAX) {
x1[i] <- t4[[i]]$method
x2[i] <- as.numeric(round(getTrainPerf(t4[[i]])$TrainAccuracy, 4))
x3[i] <- as.numeric(round(getTrainPerf(t4[[i]])$TrainKappa, 4))
x4[i] <- as.numeric(t4[[i]]$times$everything[3])
x5[i] <- t4[[i]]$modelInfo$label
}
df1 <- data.frame(x1, x2, x3, x4, x5, stringsAsFactors=FALSE)
colnames(df1) <- c("method", "accuracy", "kappa", "runtime", "longname")
return(df1)
}
df1 <- trainSummary(t4)
head(df1[order(-df1$accuracy),], 20)
head(df1[order(-df1$kappa),], 20)
head(df1[order(df1$runtime),], 20)
# Row numbers from df1 can be used to index individual method training results
xgbDART.rownum <- as.numeric(rownames(df1[df1$method == "xgbDART",]))
confusionMatrix(t4[[xgbDART.rownum]])
xgbDART.fit <- t4[[xgbDART.rownum]]$finalModel