OL pre-meeting updates

slds-lmu · Sep 9, 2024 · 94382b6 · 94382b6
1 parent d3e5627
commit 94382b6
Show file tree

Hide file tree

Showing 8 changed files with 186 additions and 183 deletions.
diff --git a/slides/advriskmin/chapter-order.tex b/slides/advriskmin/chapter-order.tex
@@ -41,6 +41,9 @@ \subsection{Bernoulli Loss}
 \subsection{Logistic Regression (Deep-Dive)}
 \includepdf[pages=-]{../../slides-pdf/slides-advriskmin-logreg-deepdive.pdf}
 
+\subsection{Proper Scoring Rules}
+\includepdf[pages=-]{../../slides-pdf/slides-advriskmin-proper-scoring-rules.pdf}
+
 \subsection{Brier Score}
 \includepdf[pages=-]{../../slides-pdf/slides-advriskmin-classification-brier.pdf}
 

diff --git a/slides/advriskmin/slides-advriskmin-bias-variance-decomposition.tex b/slides/advriskmin/slides-advriskmin-bias-variance-decomposition.tex
@@ -256,6 +256,7 @@
 
 \begin{vbframe}{Approximation and Estimation \citelink{BROWN2024BIAS} }
 The Bias-Variance decomp is often confused or equated with the related (but different) decomp of \textbf{excess risk} into \textbf{estimation} and \textbf{approximation} error.
+\vspace{-0.3cm}
 
 \begin{eqnarray*}
     \underbrace{\risk(\hat f_{\Hspace}) - \risk(\fbayes_{\Hspace_{all}})}_{\text{excess risk}} &=& \underbrace{\risk(\hat f_{\Hspace}) - \risk(\fbayes_{\Hspace})}_{\text{estimation error}} + \underbrace{\risk(\fbayes_{\Hspace}) -  \risk(\fbayes_{\Hspace_{all}})}_{\text{approx. error}} 
@@ -265,14 +266,15 @@
 \vspace{-0.1cm}
 \begin{figure}
     \centering
-    \includegraphics[width = 0.78\textwidth]{figure_man/biasvar-vs-estapprox-tradeoff.png}
+    \includegraphics[width = 0.7\textwidth]{figure_man/biasvar-vs-estapprox-tradeoff.png}
     \tiny{\\ Credit: \cite{BROWN2024BIAS}}
   \end{figure}
 
+{\footnotesize \textbf{NB}: It should be noted that the bias-variance decomp. only holds for certain losses, while the above decomposition is universal.}
 
-+end{vbframe}
+\end{vbframe}
 
-\framebreak
+%\framebreak
 
 \begin{vbframe}{Approx./Estimation Error \citelink{BROWN2024BIAS}}
 

diff --git a/slides/regularization/figure/enet_lasso_ridge_r2.png b/slides/regularization/figure/enet_lasso_ridge_r2.png
diff --git a/slides/regularization/figure/enet_tradeoff.png b/slides/regularization/figure/enet_tradeoff.png
diff --git a/slides/regularization/rsrc/enet_exp.R b/slides/regularization/rsrc/enet_exp.R
@@ -1,113 +1,113 @@
-# ------------------------------------------------------------------------------
-# enetlogreg
-
-# DATA: generate regression data y = X(n*q ~Normal)*beta(q=500) + eps(n ~Normal)
-#   (1) beta is sparse, only 5 is non-zero out of 500
-#   (2) beta is non-sparse
-#       then calculate R-squared with enet, lasso and ridge regression
-# ------------------------------------------------------------------------------
-
-library(mlr3)
-library(glmnet)
-library(mlr3learners)
-library(mlr3tuning)
-library(mlr3misc)
-library(pracma)
-library(mvtnorm)
-library(future)
-
-set.seed(123)
-
-# DATA -------------------------------------------------------------------------
-
-n_train = 100
-n_test = 10000
-n = n_train + n_test
-n_reps = 20
-n_folds = 5
-gs1_grid = 30
-gs2_grid = c(10, 20)
-p = 500
-q_seq = c(5, 500)
-x_corr = 0.8
-
-# Initialize grid search tuners
-tuner1 = tnr("grid_search", resolution = gs1_grid) # Tuner for lambda only
-tuner2 = tnr("grid_search",                       # Tuner for both alpha and lambda
-             param_resolutions = c(alpha = gs2_grid[1], lambda = gs2_grid[2])
-)
-
-inner = rsmp("cv", folds = n_folds)
-mm = msr("regr.mse")
-
-l1 = lrn("regr.glmnet", alpha = 0, id = "ridge")
-l2 = lrn("regr.glmnet", alpha = 1, id = "lasso")
-l3 = lrn("regr.glmnet", id = "enet")
-
-ss1 = ps(
-  lambda = p_dbl(1e-3, 1e2, logscale = TRUE) # Log-scaled lambda search space
-)
-
-l1 = auto_tuner(tuner1, l1, inner, mm, search_space = ss1)
-l2 = auto_tuner(tuner1, l2, inner, mm, search_space = ss1)
-
-ss2 = ps(
-  alpha = p_dbl(0, 1),
-  lambda = p_dbl(1e-3, 1e2, logscale = TRUE)
-) # Search space
-
-l3 = auto_tuner(tuner2, l3, inner, mm, search_space = ss2)
-
-mylearners = list(l1, l2, l3)
-
-myrsmp = rsmp("holdout", ratio = n_train / n)
-# lrn_order = c("LM", "ridge", "lasso")
-
-# FUNC -------------------------------------------------------------------------
-
-# Simulate data based on the given parameters and return regression task
-make_simul_data = function(rep_i, q) {
-  sigma = x_corr^(0:(p-1))
-  sigma = Toeplitz(sigma)
-  X = rmvnorm(n = n, sigma = sigma)
-  eps = rnorm(n = n, sd = 0.1)
-  theta = c(rep(1, q), rep(0, p-q))
-  y = X %*% theta + eps
-  d = as.data.frame(X)
-  colnames(d) = sprintf("x%03i", 1:p)
-  d$y = y               
-  tt = as_task_regr(d, target = "y", id = sprintf("q:%i", q))
-  return(tt)
-}
-
-# Function to run benchmarking
-run_bm = function(n_reps) {
-  simul_grid = expand.grid(q = q_seq, rep_i = 1:n_reps)
-  mytasks = lapply(1:nrow(simul_grid), function(i) {
-    row = simul_grid[i,]           
-    make_simul_data(rep_i = row$rep_i, q = row$q)
-  })
-  bg = benchmark_grid(mytasks, mylearners, myrsmp)
-  bmr = benchmark(bg, store_models = TRUE)
-  ba = bmr$aggregate(msr("regr.rsq"))
-  list(bmr = bmr, ba = ba)# detailed and aggregated benchmark result
-}
-
-# DATA -------------------------------------------------------------------------
-
-# Execute benchmarking in parallel using multiple cores
-plan("multicore")
-z = run_bm(n_reps)
-ba = z$ba
-bmr = z$bmr
-
-# Extract and save model coefficients (betas)
-nn = length(bmr$uhashes)
-betas = lapply(1:nn, function(i){
-  at = bmr$resample_results$resample_result[[i]]$learners[[1]]
-  gmod = at$learner$model                                 
-  as.numeric(gmod$beta)               
-})
-ba$betas = betas      
-ba$resample_result = NULL
-save(file = "enet_exp.RData", bmr_aggr = ba)
+# ------------------------------------------------------------------------------
+# enetlogreg
+
+# DATA: generate regression data y = X(n*q ~Normal)*theta(q=500) + eps(n ~Normal)
+#   (1) theta is sparse, only 5 is non-zero out of 500
+#   (2) theta is dense
+#       then calculate R-squared with enet, lasso and ridge regression
+# ------------------------------------------------------------------------------
+
+library(mlr3)
+library(glmnet)
+library(mlr3learners)
+library(mlr3tuning)
+library(mlr3misc)
+library(pracma)
+library(mvtnorm)
+library(future)
+
+set.seed(123)
+
+# DATA -------------------------------------------------------------------------
+
+n_train = 100
+n_test = 10000
+n = n_train + n_test
+n_reps = 20
+n_folds = 5
+gs1_grid = 30
+gs2_grid = c(10, 20)
+p = 500
+q_seq = c(5, 500)
+x_corr = 0.8
+
+# Initialize grid search tuners
+tuner1 = tnr("grid_search", resolution = gs1_grid) # Tuner for lambda only
+tuner2 = tnr("grid_search",                       # Tuner for both alpha and lambda
+             param_resolutions = c(alpha = gs2_grid[1], lambda = gs2_grid[2])
+)
+
+inner = rsmp("cv", folds = n_folds)
+mm = msr("regr.mse")
+
+l1 = lrn("regr.glmnet", alpha = 0, id = "ridge")
+l2 = lrn("regr.glmnet", alpha = 1, id = "lasso")
+l3 = lrn("regr.glmnet", id = "enet")
+
+ss1 = ps(
+  lambda = p_dbl(1e-3, 1e2, logscale = TRUE) # Log-scaled lambda search space
+)
+
+l1 = auto_tuner(tuner1, l1, inner, mm, search_space = ss1)
+l2 = auto_tuner(tuner1, l2, inner, mm, search_space = ss1)
+
+ss2 = ps(
+  alpha = p_dbl(0, 1),
+  lambda = p_dbl(1e-3, 1e2, logscale = TRUE)
+) # Search space
+
+l3 = auto_tuner(tuner2, l3, inner, mm, search_space = ss2)
+
+mylearners = list(l1, l2, l3)
+
+myrsmp = rsmp("holdout", ratio = n_train / n)
+# lrn_order = c("LM", "ridge", "lasso")
+
+# FUNC -------------------------------------------------------------------------
+
+# Simulate data based on the given parameters and return regression task
+make_simul_data = function(rep_i, q) {
+  sigma = x_corr^(0:(p-1))
+  sigma = Toeplitz(sigma)
+  X = rmvnorm(n = n, sigma = sigma)
+  eps = rnorm(n = n, sd = 0.1)
+  theta = c(rep(1, q), rep(0, p-q))
+  y = X %*% theta + eps
+  d = as.data.frame(X)
+  colnames(d) = sprintf("x%03i", 1:p)
+  d$y = y
+  tt = as_task_regr(d, target = "y", id = sprintf("q:%i", q))
+  return(tt)
+}
+
+# Function to run benchmarking
+run_bm = function(n_reps) {
+  simul_grid = expand.grid(q = q_seq, rep_i = 1:n_reps)
+  mytasks = lapply(1:nrow(simul_grid), function(i) {
+    row = simul_grid[i,]           
+    make_simul_data(rep_i = row$rep_i, q = row$q)
+  })
+  bg = benchmark_grid(mytasks, mylearners, myrsmp)
+  bmr = benchmark(bg, store_models = TRUE)
+  ba = bmr$aggregate(msr("regr.rsq"))
+  list(bmr = bmr, ba = ba)# detailed and aggregated benchmark result
+}
+
+# DATA -------------------------------------------------------------------------
+
+# Execute benchmarking in parallel using multiple cores
+plan("multicore")
+z = run_bm(n_reps)
+ba = z$ba
+bmr = z$bmr
+
+# Extract and save model coefficients (betas)
+nn = length(bmr$uhashes)
+betas = lapply(1:nn, function(i){
+  at = bmr$resample_results$resample_result[[i]]$learners[[1]]
+  gmod = at$learner$model                                 
+  as.numeric(gmod$beta)           
+})
+ba$betas = betas      
+ba$resample_result = NULL
+save(file = "enet_exp.RData", bmr_aggr = ba)
diff --git a/slides/regularization/rsrc/enet_lasso_ridge_r2.R b/slides/regularization/rsrc/enet_lasso_ridge_r2.R
@@ -1,53 +1,53 @@
-# ------------------------------------------------------------------------------
-# enetlogreg
-
-# FIG: boxplot of R-squared for elasticnet, lasso and ridge
-# LEFT: linear model with 5 non-Zero coefficients (sparse)
-# RIGHT: linear model with 500 non-Zero coefficients
-# ------------------------------------------------------------------------------
-
-library(ggplot2)
-library(gridExtra)
-load("enet_exp.RData")
-
-# PLOT -------------------------------------------------------------------------
-
-q_values <- sapply(ba$task_id, function(task) {
-  as.numeric(sub("q:(\\d+)", "\\1", task))
-})
-
-performance_df <- as.data.frame(ba)
-performance_df$q <- q_values
-performance_df$learner_id <- as.factor(gsub("\\.tuned", "", performance_df$learner_id))
-
-# linear model with sparse features
-df_5 <- performance_df[performance_df['q']==5,]
-
-p1 <- ggplot(data = df_5, aes(x = regr.rsq, y = learner_id)) +
-  geom_boxplot() +
-  coord_flip() +
-  ylab("") +
-  labs(title="sparse") +
-  xlab("R-squared")+
-  xlim(0.5,1)+
-  theme_minimal(base_size = 10) +
-  theme(legend.position="none",
-        axis.title.x=element_blank())
-
-# linear model with non-sparse features
-df_500 <- performance_df[performance_df['q']==500,]
-
-p2 <- ggplot(data = df_500, aes(x = regr.rsq, y = learner_id)) +
-  geom_boxplot() +
-  coord_flip() +
-  ylab("") +
-  xlab("R-squared")+
-  labs(title="non-sparse") +
-  xlim(0.5,1)+
-  theme_minimal(base_size = 10) +
-  theme(legend.position="none",
-        axis.title.x=element_blank())
-
-p <- grid.arrange(p1, p2, nrow= 1)
-
-ggsave("../figure/enet_lasso_ridge_r2.png", plot = p, width = 6, height = 2)
+# ------------------------------------------------------------------------------
+# enetlogreg
+
+# FIG: boxplot of R-squared for elasticnet, lasso and ridge
+# LEFT: linear model with 5 non-Zero coefficients (sparse)
+# RIGHT: linear model with 500 non-Zero coefficients
+# ------------------------------------------------------------------------------
+
+library(ggplot2)
+library(gridExtra)
+load("enet_exp.RData")
+
+# PLOT -------------------------------------------------------------------------
+
+q_values <- sapply(ba$task_id, function(task) {
+  as.numeric(sub("q:(\\d+)", "\\1", task))
+})
+
+performance_df <- as.data.frame(ba)
+performance_df$q <- q_values
+performance_df$learner_id <- as.factor(gsub("\\.tuned", "", performance_df$learner_id))
+
+# linear model with sparse features
+df_5 <- performance_df[performance_df['q']==5,]
+
+p1 <- ggplot(data = df_5, aes(x = regr.rsq, y = learner_id)) +
+  geom_boxplot() +
+  coord_flip() +
+  ylab("") +
+  labs(title="sparse") +
+  xlab("R-squared")+
+  xlim(0.95,1)+
+  theme_minimal(base_size = 10) +
+  theme(legend.position="none",
+        axis.title.x=element_blank())
+
+# linear model with dense features
+df_500 <- performance_df[performance_df['q']==500,]
+
+p2 <- ggplot(data = df_500, aes(x = regr.rsq, y = learner_id)) +
+  geom_boxplot() +
+  coord_flip() +
+  ylab("") +
+  xlab("R-squared")+
+  labs(title="dense") +
+  xlim(0.5,1)+
+  theme_minimal(base_size = 10) +
+  theme(legend.position="none",
+        axis.title.x=element_blank())
+
+p <- grid.arrange(p1, p2, nrow= 1)
+
+ggsave("../figure/enet_lasso_ridge_r2.png", plot = p, width = 6, height = 2)
diff --git a/slides/regularization/rsrc/enet_tradeoff.R b/slides/regularization/rsrc/enet_tradeoff.R
@@ -48,10 +48,10 @@ p1 <- ggplot(data = df_5, aes(x=as.numeric(betas), y = as.numeric(index), group=
   labs(title="sparse") +
   facet_grid(learner_id~.)+
   xlab("value") +
-  ylab(expression('index of'~betas)) +
+  ylab(expression('index of'~theta)) +
   scale_y_continuous(breaks=1:10)
 
-# linear model with non-sparse features
+# linear model with dense features
 df_500 <- performance_df[performance_df['q']==500,]
 df_500 <- df_500 %>% select(learner_id, betas)
 
@@ -70,9 +70,9 @@ p2 <- ggplot(data = df_500, aes(x=as.numeric(betas), y = as.numeric(index), grou
   geom_boxplot(width = 0.4, color = "gray50", alpha = 0.5) +
   coord_flip()+
   facet_grid(learner_id~.) +
-  labs(title="non-sparse") +
+  labs(title="dense") +
   xlab("value") +
-  ylab(expression('index of'~betas)) +
+  ylab(expression('index of'~theta)) +
   scale_y_continuous(breaks=1:10)
 
 p <- grid.arrange(p1, p2, nrow=1)