diff --git a/slides/regularization/figure/enet_tradeoff.png b/slides/regularization/figure/enet_tradeoff.png index 5896575d..a2ef3cfa 100644 Binary files a/slides/regularization/figure/enet_tradeoff.png and b/slides/regularization/figure/enet_tradeoff.png differ diff --git a/slides/regularization/figure/l1_l2_regupaths_mse.png b/slides/regularization/figure/l1_l2_regupaths_mse.png new file mode 100644 index 00000000..1091559c Binary files /dev/null and b/slides/regularization/figure/l1_l2_regupaths_mse.png differ diff --git a/slides/regularization/figure/lq_penalty.png b/slides/regularization/figure/lq_penalty.png new file mode 100644 index 00000000..fd74a406 Binary files /dev/null and b/slides/regularization/figure/lq_penalty.png differ diff --git a/slides/regularization/figure/reg_logreg.png b/slides/regularization/figure/reg_logreg.png index cbeacfe4..867188f2 100644 Binary files a/slides/regularization/figure/reg_logreg.png and b/slides/regularization/figure/reg_logreg.png differ diff --git a/slides/regularization/rsrc/enet_tradeoff.R b/slides/regularization/rsrc/enet_tradeoff.R new file mode 100644 index 00000000..d39c2153 --- /dev/null +++ b/slides/regularization/rsrc/enet_tradeoff.R @@ -0,0 +1,80 @@ +# ------------------------------------------------------------------------------ +# enetlogreg + +# FIG: boxplot and violinplot of coefficients for elasticnet, lasso and ridge. +# only show first ten coefficients. +# LEFT: linear model with 5 non-Zero coefficients (sparse) +# RIGHT: linear model with 500 non-Zero coefficients +# ------------------------------------------------------------------------------ + +library(ggplot2) +library(gridExtra) +library(dplyr) +library(tidyr) + +theme_set(theme_minimal()) + +# DATA ------------------------------------------------------------------------- + +load("enet_exp.RData") + +# PLOT ------------------------------------------------------------------------- +q_values <- sapply(ba$task_id, function(task) { + as.numeric(sub("q:(\\d+)", "\\1", task)) +}) + +performance_df <- as.data.frame(ba) +performance_df$q <- q_values +performance_df$learner_id <- as.factor(gsub("\\.tuned", "", performance_df$learner_id)) + +# linear model with sparse features +df_5 <- performance_df[performance_df['q']==5,] +df_5 <- df_5 %>% select(learner_id, betas) + +df_5 <- df_5 %>% + unnest_longer(betas) + +beta_sam <- performance_df[1,'betas'][[1]] +index_column <- rep(1:length(beta_sam), length.out = nrow(df_5)) +df_5$index <- index_column + +df_5 <- df_5 %>% + filter(index <= 10) + +p1 <- ggplot(data = df_5, aes(x=as.numeric(betas), y = as.numeric(index), group=as.numeric(index)))+ + geom_violin(position = "dodge", trim = FALSE, scale = "width") + + geom_boxplot(width = 0.4, color = "gray50", alpha = 0.5) + + coord_flip()+ + labs(title="sparse") + + facet_grid(learner_id~.)+ + xlab("value") + + ylab(expression('index of'~betas)) + + scale_y_continuous(breaks=1:10) + +# linear model with non-sparse features +df_500 <- performance_df[performance_df['q']==500,] +df_500 <- df_500 %>% select(learner_id, betas) + +df_500 <- df_500 %>% + unnest_longer(betas) + +beta_sam <- performance_df[1,'betas'][[1]] +index_column <- rep(1:length(beta_sam), length.out = nrow(df_500)) +df_500$index <- index_column + +df_500 <- df_500 %>% + filter(index <= 10) + +p2 <- ggplot(data = df_500, aes(x=as.numeric(betas), y = as.numeric(index), group=as.numeric(index)))+ + geom_violin(position = "dodge", trim = FALSE, scale = "width") + + geom_boxplot(width = 0.4, color = "gray50", alpha = 0.5) + + coord_flip()+ + facet_grid(learner_id~.) + + labs(title="non-sparse") + + xlab("value") + + ylab(expression('index of'~betas)) + + scale_y_continuous(breaks=1:10) + +p <- grid.arrange(p1, p2, nrow=1) + +ggsave("../figure/enet_tradeoff.png", plot = p, width = 8, height = 4.5) diff --git a/slides/regularization/rsrc/l1_l2_regupaths_mse.R b/slides/regularization/rsrc/l1_l2_regupaths_mse.R index d14acf01..f435d8bc 100644 --- a/slides/regularization/rsrc/l1_l2_regupaths_mse.R +++ b/slides/regularization/rsrc/l1_l2_regupaths_mse.R @@ -1,42 +1,89 @@ -library(glmnet) -library(datasets) - -set.seed(42) - -# Load mtcars -data(mtcars) -x <- as.matrix(mtcars[, -1]) -y <- mtcars$mpg - -# Normalize data -x <- scale(x, center = TRUE, scale = TRUE) -y <- scale(y, center = TRUE, scale = FALSE) - -# Lasso with cross-validation -cvfit_lasso <- cv.glmnet(x, y, alpha = 1) - -# Ridge with cross-validation -cvfit_ridge <- cv.glmnet(x, y, alpha = 0) - -# Plot layout -par(mfrow = c(2, 2), oma = c(0, 0, 0, 0), mar = c(5, 4, 4, 2) + 0.1) -par(cex.main = 1.7, cex.axis = 1.5, cex.lab = 1.5, lwd=1.2) - -# Plot regularization path for Lasso -plot(cvfit_lasso$glmnet.fit, xvar = "lambda", label = TRUE, main = "") -title(main = "lasso coefficients path", line = 2.5) - -# Plot regularization path for Ridge -plot(cvfit_ridge$glmnet.fit, xvar = "lambda", label = TRUE, main = "") -title(main = "ridge coefficients path", line = 2.5) - -# Plot RMSE vs. Lambda for Lasso -plot(cvfit_lasso, main = "") -title(main = "MSE vs. lambda for lasso", line = 2.5) - -# Plot RMSE vs. Lambda for Ridge -plot(cvfit_ridge, main = "") -title(main = "MSE vs. lambda for ridge", line = 2.5) - -# Reset layout to default -par(mfrow = c(1, 1), oma = c(0, 0, 0, 0), mar = c(5, 4, 4, 2) + 0.1) +# ------------------------------------------------------------------------------ +# l1 vs l2 + +# FIG: +# (1) how each coefficient changes with log(lambda) under l1 and l2 regularization +# (2) how MSE changes with log(lambda) under l1 and l2 regularization +# DATA: mtcars +# ------------------------------------------------------------------------------ +library(glmnet) +library(datasets) +library(tidyr) +library(dplyr) +library(ggplot2) +library(gridExtra) + +set.seed(42) + +# DATA ------------------------------------------------------------------------- + +data(mtcars) +x <- as.matrix(mtcars[, -1]) +y <- mtcars$mpg + +# Normalize data +x <- scale(x, center = TRUE, scale = TRUE) +y <- scale(y, center = TRUE, scale = FALSE) + +# Lasso with cross-validation +cvfit_lasso <- cv.glmnet(x, y, alpha = 1) + +# Ridge with cross-validation +cvfit_ridge <- cv.glmnet(x, y, alpha = 0) + +# Extracting data for Lasso +lasso_coefs <- as.data.frame(t(as.matrix(cvfit_lasso$glmnet.fit$beta))) +lasso_coefs$lambda <- cvfit_lasso$glmnet.fit$lambda +lasso_coefs <- lasso_coefs %>% pivot_longer(-lambda, names_to = "Variable", values_to = "Coefficient") + +# Extracting data for Ridge +ridge_coefs <- as.data.frame(t(as.matrix(cvfit_ridge$glmnet.fit$beta))) +ridge_coefs$lambda <- cvfit_ridge$glmnet.fit$lambda +ridge_coefs <- ridge_coefs %>% pivot_longer(-lambda, names_to = "Variable", values_to = "Coefficient") + +# RMSE data +lasso_rmse <- data.frame(lambda = cvfit_lasso$lambda, MSE = cvfit_lasso$cvm, SE = cvfit_lasso$cvsd) +ridge_rmse <- data.frame(lambda = cvfit_ridge$lambda, MSE = cvfit_ridge$cvm, SE = cvfit_ridge$cvsd) + +# PLOT ------------------------------------------------------------------------- + +# Lasso coefficients path plot +p1 <- ggplot(lasso_coefs, aes(x = log(lambda), y = Coefficient, color = Variable)) + + geom_line() + + labs(title = "Lasso coefficients path", x = expression(log~lambda), y = "Coefficients") + + theme_minimal() + +# Ridge coefficients path plot +p2 <- ggplot(ridge_coefs, aes(x = log(lambda), y = Coefficient, color = Variable)) + + geom_line() + + labs(title = "Ridge coefficients path", x = expression(log~lambda), y = "Coefficients") + + theme_minimal() + +# Lasso RMSE vs. Lambda plot +p3 <- ggplot(lasso_rmse, aes(x = log(lambda), y = MSE)) + + geom_errorbar(aes(ymin = MSE - SE, ymax = MSE + SE), width = 0.1, color = "gray") + + geom_point(color = "red", size=0.3) + + geom_line(color = "red") + + geom_vline(xintercept = log(cvfit_lasso$lambda.min), linetype = "dashed", color = "gray", linewidth = 0.6) + + geom_vline(xintercept = log(cvfit_lasso$lambda.1se), linetype = "dashed", color = "gray", linewidth = 0.6) + + labs(title = expression(MSE ~ vs. ~ lambda ~ "for lasso"), + x = expression(Log~lambda), + y = "MSE") + + theme_minimal() + +# Ridge RMSE vs. Lambda plot +p4 <- ggplot(ridge_rmse, aes(x = log(lambda), y = MSE)) + + geom_errorbar(aes(ymin = MSE - SE, ymax = MSE + SE), width = 0.1, color = "gray") + + geom_point(color = "red", size=0.3) + + geom_line(color = "red") + + geom_vline(xintercept = log(cvfit_ridge$lambda.min), linetype = "dashed", color = "gray", linewidth = 0.6) + + geom_vline(xintercept = log(cvfit_ridge$lambda.1se), linetype = "dashed", color = "gray", linewidth = 0.6) + + labs(title = expression(MSE ~ vs. ~ lambda ~ "for ridge"), + x = expression(Log~lambda), + y = "MSE") + + theme_minimal() + +# Arrange plots in a 2x2 grid +p <- grid.arrange(p1, p2, p3, p4, nrow = 2) + +ggsave("../figure/l1_l2_regupaths_mse.png", plot = p, width = 7, height = 4) diff --git a/slides/regularization/rsrc/reg_logreg.R b/slides/regularization/rsrc/reg_logreg.R new file mode 100644 index 00000000..5e09150d --- /dev/null +++ b/slides/regularization/rsrc/reg_logreg.R @@ -0,0 +1,105 @@ +# ------------------------------------------------------------------------------ +# enetlogreg + +# FIG: binary classification task visualization with different values of lambda +# ------------------------------------------------------------------------------ + +library(mlr3) +library(mlr3learners) +library(mlr3pipelines) +library(ggplot2) +library(gridExtra) +library(viridis) +library(LiblineaR) + +theme_set(theme_minimal()) + +set.seed(314159) + +# DATA ------------------------------------------------------------------------- + +# Create feature map by taking polynomial feature map for x1 and x2: +polyDf <- function (mydf, y = NULL, degree = 7) +{ + + if (! is.null(y[1])) { + out <- data.frame( + y = mydf$y, + poly(mydf$x1, degree = degree, raw = TRUE), + poly(mydf$x2, degree = degree, raw = TRUE) + ) + + names(out) <- c("y", paste0("X1.", 1:degree), paste0("X2.", 1:degree)) + } else { + out <- data.frame( + poly(mydf$x1, degree = degree, raw = TRUE), + poly(mydf$x2, degree = degree, raw = TRUE) + ) + + names(out) <- c(paste0("X1.", 1:degree), paste0("X2.", 1:degree)) + } + return (out) +} + + +n <- 100 + +# Simulate data frame, y is choosed by grouping after the euklidean norm. +# This leads to a structure which isn't seperateable by linear hyperplanes: +mydf <- data.frame( + x1 = runif(n, -1, 1), + x2 = runif(n, -1, 1) +) + +y <- ifelse( + apply(mydf, 1, function (x) { return (sqrt(sum(x^2))) }) + rnorm(n, 0, 0.2) < 0.6, 0, 1 +) + +mydf$y <- as.factor(ifelse(y == 0, "Group1", "Group2")) + +# Create the new data frame with feature map: +mydf.poly <- polyDf(mydf, mydf$y) + +# PLOT ------------------------------------------------------------------------- + +# visualize result of classification task +plotRegLogReg <- function(lambda) { + task <- TaskClassif$new(id = "poly_task", backend = mydf.poly, target = "y") + if (lambda != 0){ + lrn <- lrn("classif.glmnet", alpha = 0, lambda = lambda) + }else{ + lrn <- lrn("classif.log_reg") + } + model <- lrn$train(task) + + test <- expand.grid(x1 = seq(-1, 1, 0.05), x2 = seq(-1, 1, 0.03)) + poly.test <- polyDf(test) + + test$Group <- model$predict_newdata(newdata = poly.test)$response + + gg <- ggplot(test, aes(x = x1, y = x2, color = Group)) + + geom_point(alpha = 0.3, stroke = 0, shape = 15) + + geom_point(data = mydf, aes(x = x1, y = x2, color = y)) + + ggtitle(bquote(lambda == .(as.character(lambda)))) + + scale_color_viridis(end = 0.9, discrete = TRUE) + + return (gg) +} + +gg1 <- plotRegLogReg(0) + theme(legend.position="none") +gg2 <- plotRegLogReg(0.0001) + theme(legend.position="none") +gg3 <- plotRegLogReg(1) + +g_legend <- function (a.gplot) { + tmp <- ggplot_gtable(ggplot_build(a.gplot)) + leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") + legend <- tmp$grobs[[leg]] + return(legend) +} + +mylegend <- g_legend(gg3) + +p <- grid.arrange(arrangeGrob(gg1, gg2, gg3 + theme(legend.position = "none"), ncol = 3), + mylegend, layout_matrix = matrix(c(1,1,1,2), nrow = 1)) + +ggsave("../figure/reg_logreg.png", plot = p, width = 8, height = 2.5) diff --git a/slides/regularization/slides-regu-l1vsl2.tex b/slides/regularization/slides-regu-l1vsl2.tex index e409b5d8..30820d6a 100644 --- a/slides/regularization/slides-regu-l1vsl2.tex +++ b/slides/regularization/slides-regu-l1vsl2.tex @@ -45,7 +45,7 @@ We see how only lasso shrinks to exactly 0. \begin{figure} -\includegraphics[width=0.8\textwidth]{figure_man/l1_l2_regupaths_mse.pdf}\\ +\includegraphics[width=0.8\textwidth]{figure/l1_l2_regupaths_mse.png}\\ \end{figure} \vspace{-0.3cm} % Coef paths and cross-val. MSE for $\lambda$ values for ridge and lasso.\\ diff --git a/slides/regularization/slides-regu-others.tex b/slides/regularization/slides-regu-others.tex index cf65326b..7f4210be 100644 --- a/slides/regularization/slides-regu-others.tex +++ b/slides/regularization/slides-regu-others.tex @@ -61,7 +61,7 @@ \vspace{-0.3cm} \begin{figure} \centering -\scalebox{0.9}{\includegraphics{figure_man/lq-penalty-plots.png}} +\scalebox{0.9}{\includegraphics{figure/lq_penalty.png}} \end{figure}