From a69deea26d893d59cc94aac1107dd6acfd901492 Mon Sep 17 00:00:00 2001 From: Ziyu-Mu Date: Tue, 6 Aug 2024 12:46:57 +0200 Subject: [PATCH] overfitting table Signed-off-by: Ziyu-Mu --- .../rsrc/make_overfitting_table.R | 60 ------------------- .../regularization/rsrc/table_overfitting.R | 52 ++++++++++++++++ .../regularization/rsrc/table_overfitting.tex | 11 ++++ slides/regularization/slides-regu-intro.tex | 12 +--- 4 files changed, 64 insertions(+), 71 deletions(-) delete mode 100644 slides/regularization/rsrc/make_overfitting_table.R create mode 100755 slides/regularization/rsrc/table_overfitting.R create mode 100755 slides/regularization/rsrc/table_overfitting.tex diff --git a/slides/regularization/rsrc/make_overfitting_table.R b/slides/regularization/rsrc/make_overfitting_table.R deleted file mode 100644 index ddbc4cce..00000000 --- a/slides/regularization/rsrc/make_overfitting_table.R +++ /dev/null @@ -1,60 +0,0 @@ -# Load necessary libraries -library(MASS) -library(nnet) -library(e1071) -library(caret) -library(xtable) - -# Set seed for reproducibility -set.seed(123) - -# Load Boston housing dataset -data(Boston) - -# Split data into training and testing sets -index <- createDataPartition(Boston$medv, p = 0.7, list = FALSE) -train_set <- Boston[index, ] -test_set <- Boston[-index, ] - -# Preprocessing: Center and scale the data -preproc <- preProcess(train_set[, -14], method = c("center", "scale")) -train_set_preprocessed <- predict(preproc, train_set[, -14]) -test_set_preprocessed <- predict(preproc, test_set[, -14]) - -# Add the medv (median value) column back -train_set_preprocessed$medv <- train_set$medv -test_set_preprocessed$medv <- test_set$medv - - -# Define and train the overparameterized neural network -nn_model <- nnet(medv ~ ., data = train_set_preprocessed, size = 100, linout = TRUE, maxit = 20000, MaxNWts = 10000, decay = 0) - -# Define and train the SVM with a radial basis kernel -svm_model <- svm(medv ~ ., data = train_set_preprocessed, kernel = "radial", cost = 1e6, gamma = 10) - -# Predictions -nn_pred_train <- predict(nn_model, train_set_preprocessed) -nn_pred_test <- predict(nn_model, test_set_preprocessed) - -svm_pred_train <- predict(svm_model, train_set_preprocessed) -svm_pred_test <- predict(svm_model, test_set_preprocessed) - -# Calculate Mean Squared Errors -nn_train_error <- mean((nn_pred_train - train_set_preprocessed$medv)^2) -nn_test_error <- mean((nn_pred_test - test_set_preprocessed$medv)^2) - -svm_train_error <- mean((svm_pred_train - train_set_preprocessed$medv)^2) -svm_test_error <- mean((svm_pred_test - test_set_preprocessed$medv)^2) - -# Create a 2x2 comparison table with rounded results -results <- matrix(round(c(nn_train_error, nn_test_error, svm_train_error, svm_test_error), 2), nrow = 2, byrow = TRUE) -colnames(results) <- c("Neural Network", "SVM") -rownames(results) <- c("Training Error", "Test Error") - - -# Convert matrix to LaTeX table -latex_table <- xtable(results) - -# Print the LaTeX table -print(latex_table, include.rownames = TRUE, include.colnames = TRUE, comment = FALSE) - diff --git a/slides/regularization/rsrc/table_overfitting.R b/slides/regularization/rsrc/table_overfitting.R new file mode 100755 index 00000000..3f118d9a --- /dev/null +++ b/slides/regularization/rsrc/table_overfitting.R @@ -0,0 +1,52 @@ +# ------------------------------------------------------------------------------ +# intro +# TABLE: +# train and test MSE table using Neural Network and CART (overfitting). +# DATA: mtcars +# ------------------------------------------------------------------------------ + +library(nnet) +library(xtable) +library(mlr3) +library(mlr3learners) +set.seed(123) + +# DATA ------------------------------------------------------------------------- + +lgr::get_logger("mlr3")$set_threshold("info") + +task = tsk("mtcars") + +lrn1 = lrn("regr.nnet", size = 100, maxit = 20000, MaxNWts = 10000, decay = 0, abstol = 1e-7) +lrn1$encapsulate = c(train = "evaluate", predict = "evaluate") +lrn2 = lrn("regr.rpart", minsplit = 2, cp = 0) + +my_learners = list(lrn1, lrn2) +for (x in my_learners){ + x$predict_sets = c("train", "test") +} + +bg = benchmark_grid(task, my_learners, rsmp("cv", folds = 10)) + +bmr = benchmark(bg) + +m1 = msr("regr.mse", predict_sets = c("test"), id = "mse-test") +m2 = msr("regr.mse", predict_sets = c("train"), id = "mse-train") + +a = bmr$aggregate(measures = list(m1, m2)) +print(a) + +# TABLE ------------------------------------------------------------------------ + +# Create a 2x2 comparison table with rounded results +res = as.data.frame(a) +res = res[, c("mse-train", "mse-test")] + +rownames(res) = c("Neural Network", "CART") +colnames(res) = c("Train MSE", "Test MSE") + +latex_tab = xtable(res) + +print(latex_tab, file = "table_overfitting.tex", include.rownames = TRUE, include.colnames = TRUE, comment = FALSE) + + diff --git a/slides/regularization/rsrc/table_overfitting.tex b/slides/regularization/rsrc/table_overfitting.tex new file mode 100755 index 00000000..c7dd338f --- /dev/null +++ b/slides/regularization/rsrc/table_overfitting.tex @@ -0,0 +1,11 @@ +\begin{table}[ht] +\centering +\begin{tabular}{rrr} + \hline + & Train MSE & Test MSE \\ + \hline +Neural Network & 1.47 & 345.84 \\ + CART & 0.00 & 6.91 \\ + \hline +\end{tabular} +\end{table} diff --git a/slides/regularization/slides-regu-intro.tex b/slides/regularization/slides-regu-intro.tex index f30328e2..a2e6ca7a 100644 --- a/slides/regularization/slides-regu-intro.tex +++ b/slides/regularization/slides-regu-intro.tex @@ -92,17 +92,7 @@ \lz \lz -\begin{table}[ht] -\centering -\begin{tabular}{rrr} - \hline - & Train MSE & Test MSE \\ - \hline -Neural Network & 3.68 & 19.98 \\ - CART & 0.00 & 10.21 \\ - \hline -\end{tabular} -\end{table} +\input{rsrc/table_overfitting.tex} \lz \lz