Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rborist hard crashes R under caret #36

Open
tobigithub opened this issue Jul 21, 2016 · 0 comments
Open

Rborist hard crashes R under caret #36

tobigithub opened this issue Jul 21, 2016 · 0 comments

Comments

@tobigithub
Copy link
Owner

Hard crash with "R for Windows has stopped working"

# Use of all 160 caret models for binary classification and diabetes set
# The  output from  fast (working) binary classification models is
# exported to a sortable table in a web browser using the DT library
# https://github.com/tobigithub/caret-machine-learning
# Tobias Kind (2015)

# use mlbench, caret and DT library
require(mlbench)
require(caret)
require(DT)

# load diabetes set 768 x 9
data(PimaIndiansDiabetes) 
dim(PimaIndiansDiabetes) 

# get all model names for classification
m <- unique(modelLookup()[modelLookup()$forClass,c(1)])
length(m); m;

# slow classification models ("rbf" crashes; "dwdLinear", "ownn", "snn" have issues)
# all others may have just failed and are not listed here
#
removeModels <- c("AdaBag", "AdaBoost.M1", "FH.GBML", "pda2", "PenalizedLDA",
"GFS.GCCL", "rbf", "RFlda", "nodeHarvest", "ORFsvm", "dwdLinear", "dwdPoly", "gam",
"gaussprLinear", "ownn", "sddaLDA", "sddaQDA", "SLAVE", "smda", "snn", "rmda", 
"rFerns", "wsrf","ordinalNet","awnb", "awtan","manb","nbDiscrete","nbSearch","tan",
"tanSearch","bartMachine","randomGLM", "Rborist")

#remove all slow and failed models from model list
m <- m[!m %in% removeModels]

m <- c("rf","Rborist")

# pre-load all packages (does not really work due to other dependencies)
suppressPackageStartupMessages(ll <-lapply(m, require, character.only = TRUE))

# show which libraries were loaded  
sessionInfo()

# load X and Y (this will be transferred to to train function)
X = PimaIndiansDiabetes[1:40,1:8]
Y = PimaIndiansDiabetes$diabetes[1:40]

# register parallel front-end
library(doParallel); cl <- makeCluster(detectCores()); registerDoParallel(cl)

# this is required otherwise the first method is benchmarked wrong
warmup <-train(y=Y, x=X, "rf", trControl = trainControl(method = "boot632"))

# this setup actually calls the caret::train function, in order to provide
# minimal error handling this type of construct is needed.
trainCall <- function(i) 
    {
         cat("----------------------------------------------------","\n");
         set.seed(123); cat(i," <- loaded\n");
         return(tryCatch(
                t2 <- train(y=Y, x=X, (i), trControl = trainControl(method = "boot632")),
                error=function(e) NULL))
    }

# use lapply/loop to run everything, required for try/catch error function to work
t2 <- lapply(m, trainCall)

#remove NULL values, we only allow succesful methods, provenance is deleted.
t2 <- t2[!sapply(t2, is.null)]

# this setup extracts the results with minimal error handling 
# TrainKappa can be sometimes zero, but Accuracy SD can be still available
# see Kappa value http://epiville.ccnmtl.columbia.edu/popup/how_to_calculate_kappa.html
printCall <- function(i) 
    {
         return(tryCatch(
            {
             cat(sprintf("%-22s",(m[i])))
         cat(round(getTrainPerf(t2[[i]])$TrainAccuracy,4),"\t")
         cat(round(getTrainPerf(t2[[i]])$TrainKappa,4),"\t")
         cat(t2[[i]]$times$everything[3],"\n")},
             error=function(e) NULL))
    }

r2 <- lapply(1:length(t2), printCall)

# stop cluster and register sequntial front end
stopCluster(cl); registerDoSEQ();

# preallocate data types
i = 1; MAX = length(t2);
x1 <- character() # Name
x2 <- numeric()   # R2
x3 <- numeric()   # RMSE
x4 <- numeric()   # time [s]
x5 <- character() # long model name

# fill data and check indexes and NA with loop/lapply 
for (i in 1:length(t2)) {
    x1[i] <- t2[[i]]$method
    x2[i] <- as.numeric(round(getTrainPerf(t2[[i]])$TrainAccuracy,4))
    x3[i] <- as.numeric(round(getTrainPerf(t2[[i]])$TrainKappa,4))
    x4[i] <- as.numeric(t2[[i]]$times$everything[3])
    x5[i] <- t2[[i]]$modelInfo$label
}

# coerce to data frame
df1 <- data.frame(x1,x2,x3,x4,x5, stringsAsFactors=FALSE)

# print all results to R-GUI
df1

# plot models, just as example
# ggplot(t2[[1]])
# ggplot(t2[[1]])

# call web output with correct column names
datatable(df1,  options = list(
        columnDefs = list(list(className = 'dt-left', targets = c(0,1,2,3,4,5))),
        pageLength = MAX,
        order = list(list(2, 'desc'))),
        colnames = c('Num', 'Name', 'Accuracy', 'Kappa', 'time [s]', 'Model name'),
            caption = paste('Classification results from caret models',Sys.time()),
            class = 'cell-border stripe')  %>%         
            formatRound('x2', 3) %>%  
            formatRound('x3', 3) %>%
            formatRound('x4', 3) %>%
            formatStyle(2,
            background = styleColorBar(x2, 'steelblue'),
            backgroundSize = '100% 90%',
            backgroundRepeat = 'no-repeat',
            backgroundPosition = 'center'
)


### END
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
 [1] parallel  splines   grid      stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] plyr_1.8.4          wsrf_1.5.47         spls_2.2-1         
 [4] sparseLDA_0.1-7     sdwd_1.0.2          sda_1.3.7          
 [7] fdrtool_1.2.15      corpcor_1.6.8       rrlda_1.1          
[10] matrixcalc_1.0-3    glasso_1.8          mvoutlier_2.0.6    
[13] sgeostat_1.0-27     pcaPP_1.9-60        RRF_1.6            
[16] rpartScore_1.0-1    rotationForest_0.1  rocc_1.2           
[19] ROCR_1.0-7          gplots_3.0.1        Rborist_0.1-1      
[22] Rcpp_0.12.5         randomGLM_1.02-1    doParallel_1.0.10  
[25] MASS_7.3-45         protoclass_1.0      pls_2.5-0          
[28] partDSA_0.9.10      ordinalNet_1.4      oblique.tree_1.1.1 
[31] tree_1.0-37         nodeHarvest_0.7-3   nnet_7.3-12        
[34] mda_0.4-8           class_7.3-14        kknn_1.3.1         
[37] hda_0.2-14          gpls_1.44.0         glmnet_2.0-5       
[40] gbm_2.1.1           survival_2.39-4     gam_1.12           
[43] fda_2.4.4           Matrix_1.2-6        extraTrees_1.0.5   
[46] evtree_1.0-0        partykit_1.0-5      earth_4.4.4        
[49] plotmo_3.1.4        TeachingDemos_2.10  plotrix_3.6-2      
[52] deepboost_0.1.4     Boruta_5.0.0        ranger_0.5.0       
[55] binda_1.0.3         entropy_1.2.1       bartMachine_1.2.3  
[58] missForest_1.4      itertools_0.1-3     iterators_1.0.8    
[61] foreach_1.4.3       randomForest_4.6-12 car_2.1-2          
[64] bartMachineJARs_1.0 rJava_0.9-8         ada_2.0-5          
[67] rpart_4.1-10        DT_0.1              caret_6.0-70       
[70] ggplot2_2.1.0       lattice_0.20-33     mlbench_2.1-1      

loaded via a namespace (and not attached):
 [1] minqa_1.2.4           colorspace_1.2-6      MatrixModels_0.4-1   
 [4] cvTools_0.3.2         mvtnorm_1.0-5         codetools_0.2-14     
 [7] sROC_0.1-2            robustbase_0.92-6     nloptr_1.0.4         
[10] robCompositions_2.0.0 pbkrtest_0.4-6        cluster_2.0.4        
[13] compiler_3.3.1        rrcov_1.3-11          lars_1.2             
[16] htmltools_0.3.5       quantreg_5.26         tools_3.3.1          
[19] igraph_1.0.1          gtable_0.2.0          reshape2_1.4.1       
[22] gdata_2.17.0          nlme_3.1-128          lmtest_0.9-34        
[25] laeken_0.4.6          stringr_1.0.0         lme4_1.1-12          
[28] gtools_3.5.0          DEoptimR_1.0-6        zoo_1.7-13           
[31] scales_0.4.0          VIM_4.5.0             SparseM_1.7          
[34] elasticnet_1.1        reshape_0.8.5         stringi_1.1.1        
[37] e1071_1.6-7           caTools_1.17.1        boot_1.3-18          
[40] chron_2.3-47          bitops_1.0-6          htmlwidgets_0.6      
[43] GGally_1.2.0          magrittr_1.5          mgcv_1.8-12          
[46] sp_1.2-3              KernSmooth_2.23-15    data.table_1.9.6     
[49] vcd_1.4-1             digest_0.6.9          stats4_3.3.1         
[52] munsell_0.4.3         quadprog_1.5-5       
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant