Skip to content

Commit

Permalink
OL pre-meeting updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ludwigbothmann committed Sep 9, 2024
1 parent d3e5627 commit 94382b6
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 183 deletions.
3 changes: 3 additions & 0 deletions slides/advriskmin/chapter-order.tex
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ \subsection{Bernoulli Loss}
\subsection{Logistic Regression (Deep-Dive)}
\includepdf[pages=-]{../../slides-pdf/slides-advriskmin-logreg-deepdive.pdf}

\subsection{Proper Scoring Rules}
\includepdf[pages=-]{../../slides-pdf/slides-advriskmin-proper-scoring-rules.pdf}

\subsection{Brier Score}
\includepdf[pages=-]{../../slides-pdf/slides-advriskmin-classification-brier.pdf}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@

\begin{vbframe}{Approximation and Estimation \citelink{BROWN2024BIAS} }
The Bias-Variance decomp is often confused or equated with the related (but different) decomp of \textbf{excess risk} into \textbf{estimation} and \textbf{approximation} error.
\vspace{-0.3cm}

\begin{eqnarray*}
\underbrace{\risk(\hat f_{\Hspace}) - \risk(\fbayes_{\Hspace_{all}})}_{\text{excess risk}} &=& \underbrace{\risk(\hat f_{\Hspace}) - \risk(\fbayes_{\Hspace})}_{\text{estimation error}} + \underbrace{\risk(\fbayes_{\Hspace}) - \risk(\fbayes_{\Hspace_{all}})}_{\text{approx. error}}
Expand All @@ -265,14 +266,15 @@
\vspace{-0.1cm}
\begin{figure}
\centering
\includegraphics[width = 0.78\textwidth]{figure_man/biasvar-vs-estapprox-tradeoff.png}
\includegraphics[width = 0.7\textwidth]{figure_man/biasvar-vs-estapprox-tradeoff.png}
\tiny{\\ Credit: \cite{BROWN2024BIAS}}
\end{figure}

{\footnotesize \textbf{NB}: It should be noted that the bias-variance decomp. only holds for certain losses, while the above decomposition is universal.}

+end{vbframe}
\end{vbframe}

\framebreak
%\framebreak

\begin{vbframe}{Approx./Estimation Error \citelink{BROWN2024BIAS}}

Expand Down
Binary file modified slides/regularization/figure/enet_lasso_ridge_r2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified slides/regularization/figure/enet_tradeoff.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
226 changes: 113 additions & 113 deletions slides/regularization/rsrc/enet_exp.R
Original file line number Diff line number Diff line change
@@ -1,113 +1,113 @@
# ------------------------------------------------------------------------------
# enetlogreg

# DATA: generate regression data y = X(n*q ~Normal)*beta(q=500) + eps(n ~Normal)
# (1) beta is sparse, only 5 is non-zero out of 500
# (2) beta is non-sparse
# then calculate R-squared with enet, lasso and ridge regression
# ------------------------------------------------------------------------------

library(mlr3)
library(glmnet)
library(mlr3learners)
library(mlr3tuning)
library(mlr3misc)
library(pracma)
library(mvtnorm)
library(future)

set.seed(123)

# DATA -------------------------------------------------------------------------

n_train = 100
n_test = 10000
n = n_train + n_test
n_reps = 20
n_folds = 5
gs1_grid = 30
gs2_grid = c(10, 20)
p = 500
q_seq = c(5, 500)
x_corr = 0.8

# Initialize grid search tuners
tuner1 = tnr("grid_search", resolution = gs1_grid) # Tuner for lambda only
tuner2 = tnr("grid_search", # Tuner for both alpha and lambda
param_resolutions = c(alpha = gs2_grid[1], lambda = gs2_grid[2])
)

inner = rsmp("cv", folds = n_folds)
mm = msr("regr.mse")

l1 = lrn("regr.glmnet", alpha = 0, id = "ridge")
l2 = lrn("regr.glmnet", alpha = 1, id = "lasso")
l3 = lrn("regr.glmnet", id = "enet")

ss1 = ps(
lambda = p_dbl(1e-3, 1e2, logscale = TRUE) # Log-scaled lambda search space
)

l1 = auto_tuner(tuner1, l1, inner, mm, search_space = ss1)
l2 = auto_tuner(tuner1, l2, inner, mm, search_space = ss1)

ss2 = ps(
alpha = p_dbl(0, 1),
lambda = p_dbl(1e-3, 1e2, logscale = TRUE)
) # Search space

l3 = auto_tuner(tuner2, l3, inner, mm, search_space = ss2)

mylearners = list(l1, l2, l3)

myrsmp = rsmp("holdout", ratio = n_train / n)
# lrn_order = c("LM", "ridge", "lasso")

# FUNC -------------------------------------------------------------------------

# Simulate data based on the given parameters and return regression task
make_simul_data = function(rep_i, q) {
sigma = x_corr^(0:(p-1))
sigma = Toeplitz(sigma)
X = rmvnorm(n = n, sigma = sigma)
eps = rnorm(n = n, sd = 0.1)
theta = c(rep(1, q), rep(0, p-q))
y = X %*% theta + eps
d = as.data.frame(X)
colnames(d) = sprintf("x%03i", 1:p)
d$y = y
tt = as_task_regr(d, target = "y", id = sprintf("q:%i", q))
return(tt)
}

# Function to run benchmarking
run_bm = function(n_reps) {
simul_grid = expand.grid(q = q_seq, rep_i = 1:n_reps)
mytasks = lapply(1:nrow(simul_grid), function(i) {
row = simul_grid[i,]
make_simul_data(rep_i = row$rep_i, q = row$q)
})
bg = benchmark_grid(mytasks, mylearners, myrsmp)
bmr = benchmark(bg, store_models = TRUE)
ba = bmr$aggregate(msr("regr.rsq"))
list(bmr = bmr, ba = ba)# detailed and aggregated benchmark result
}

# DATA -------------------------------------------------------------------------

# Execute benchmarking in parallel using multiple cores
plan("multicore")
z = run_bm(n_reps)
ba = z$ba
bmr = z$bmr

# Extract and save model coefficients (betas)
nn = length(bmr$uhashes)
betas = lapply(1:nn, function(i){
at = bmr$resample_results$resample_result[[i]]$learners[[1]]
gmod = at$learner$model
as.numeric(gmod$beta)
})
ba$betas = betas
ba$resample_result = NULL
save(file = "enet_exp.RData", bmr_aggr = ba)
# ------------------------------------------------------------------------------
# enetlogreg

# DATA: generate regression data y = X(n*q ~Normal)*theta(q=500) + eps(n ~Normal)
# (1) theta is sparse, only 5 is non-zero out of 500
# (2) theta is dense
# then calculate R-squared with enet, lasso and ridge regression
# ------------------------------------------------------------------------------

library(mlr3)
library(glmnet)
library(mlr3learners)
library(mlr3tuning)
library(mlr3misc)
library(pracma)
library(mvtnorm)
library(future)

set.seed(123)

# DATA -------------------------------------------------------------------------

n_train = 100
n_test = 10000
n = n_train + n_test
n_reps = 20
n_folds = 5
gs1_grid = 30
gs2_grid = c(10, 20)
p = 500
q_seq = c(5, 500)
x_corr = 0.8

# Initialize grid search tuners
tuner1 = tnr("grid_search", resolution = gs1_grid) # Tuner for lambda only
tuner2 = tnr("grid_search", # Tuner for both alpha and lambda
param_resolutions = c(alpha = gs2_grid[1], lambda = gs2_grid[2])
)

inner = rsmp("cv", folds = n_folds)
mm = msr("regr.mse")

l1 = lrn("regr.glmnet", alpha = 0, id = "ridge")
l2 = lrn("regr.glmnet", alpha = 1, id = "lasso")
l3 = lrn("regr.glmnet", id = "enet")

ss1 = ps(
lambda = p_dbl(1e-3, 1e2, logscale = TRUE) # Log-scaled lambda search space
)

l1 = auto_tuner(tuner1, l1, inner, mm, search_space = ss1)
l2 = auto_tuner(tuner1, l2, inner, mm, search_space = ss1)

ss2 = ps(
alpha = p_dbl(0, 1),
lambda = p_dbl(1e-3, 1e2, logscale = TRUE)
) # Search space

l3 = auto_tuner(tuner2, l3, inner, mm, search_space = ss2)

mylearners = list(l1, l2, l3)

myrsmp = rsmp("holdout", ratio = n_train / n)
# lrn_order = c("LM", "ridge", "lasso")

# FUNC -------------------------------------------------------------------------

# Simulate data based on the given parameters and return regression task
make_simul_data = function(rep_i, q) {
sigma = x_corr^(0:(p-1))
sigma = Toeplitz(sigma)
X = rmvnorm(n = n, sigma = sigma)
eps = rnorm(n = n, sd = 0.1)
theta = c(rep(1, q), rep(0, p-q))
y = X %*% theta + eps
d = as.data.frame(X)
colnames(d) = sprintf("x%03i", 1:p)
d$y = y
tt = as_task_regr(d, target = "y", id = sprintf("q:%i", q))
return(tt)
}

# Function to run benchmarking
run_bm = function(n_reps) {
simul_grid = expand.grid(q = q_seq, rep_i = 1:n_reps)
mytasks = lapply(1:nrow(simul_grid), function(i) {
row = simul_grid[i,]
make_simul_data(rep_i = row$rep_i, q = row$q)
})
bg = benchmark_grid(mytasks, mylearners, myrsmp)
bmr = benchmark(bg, store_models = TRUE)
ba = bmr$aggregate(msr("regr.rsq"))
list(bmr = bmr, ba = ba)# detailed and aggregated benchmark result
}

# DATA -------------------------------------------------------------------------

# Execute benchmarking in parallel using multiple cores
plan("multicore")
z = run_bm(n_reps)
ba = z$ba
bmr = z$bmr

# Extract and save model coefficients (betas)
nn = length(bmr$uhashes)
betas = lapply(1:nn, function(i){
at = bmr$resample_results$resample_result[[i]]$learners[[1]]
gmod = at$learner$model
as.numeric(gmod$beta)
})
ba$betas = betas
ba$resample_result = NULL
save(file = "enet_exp.RData", bmr_aggr = ba)
106 changes: 53 additions & 53 deletions slides/regularization/rsrc/enet_lasso_ridge_r2.R
Original file line number Diff line number Diff line change
@@ -1,53 +1,53 @@
# ------------------------------------------------------------------------------
# enetlogreg

# FIG: boxplot of R-squared for elasticnet, lasso and ridge
# LEFT: linear model with 5 non-Zero coefficients (sparse)
# RIGHT: linear model with 500 non-Zero coefficients
# ------------------------------------------------------------------------------

library(ggplot2)
library(gridExtra)
load("enet_exp.RData")

# PLOT -------------------------------------------------------------------------

q_values <- sapply(ba$task_id, function(task) {
as.numeric(sub("q:(\\d+)", "\\1", task))
})

performance_df <- as.data.frame(ba)
performance_df$q <- q_values
performance_df$learner_id <- as.factor(gsub("\\.tuned", "", performance_df$learner_id))

# linear model with sparse features
df_5 <- performance_df[performance_df['q']==5,]

p1 <- ggplot(data = df_5, aes(x = regr.rsq, y = learner_id)) +
geom_boxplot() +
coord_flip() +
ylab("") +
labs(title="sparse") +
xlab("R-squared")+
xlim(0.5,1)+
theme_minimal(base_size = 10) +
theme(legend.position="none",
axis.title.x=element_blank())

# linear model with non-sparse features
df_500 <- performance_df[performance_df['q']==500,]

p2 <- ggplot(data = df_500, aes(x = regr.rsq, y = learner_id)) +
geom_boxplot() +
coord_flip() +
ylab("") +
xlab("R-squared")+
labs(title="non-sparse") +
xlim(0.5,1)+
theme_minimal(base_size = 10) +
theme(legend.position="none",
axis.title.x=element_blank())

p <- grid.arrange(p1, p2, nrow= 1)

ggsave("../figure/enet_lasso_ridge_r2.png", plot = p, width = 6, height = 2)
# ------------------------------------------------------------------------------
# enetlogreg

# FIG: boxplot of R-squared for elasticnet, lasso and ridge
# LEFT: linear model with 5 non-Zero coefficients (sparse)
# RIGHT: linear model with 500 non-Zero coefficients
# ------------------------------------------------------------------------------

library(ggplot2)
library(gridExtra)
load("enet_exp.RData")

# PLOT -------------------------------------------------------------------------

q_values <- sapply(ba$task_id, function(task) {
as.numeric(sub("q:(\\d+)", "\\1", task))
})

performance_df <- as.data.frame(ba)
performance_df$q <- q_values
performance_df$learner_id <- as.factor(gsub("\\.tuned", "", performance_df$learner_id))

# linear model with sparse features
df_5 <- performance_df[performance_df['q']==5,]

p1 <- ggplot(data = df_5, aes(x = regr.rsq, y = learner_id)) +
geom_boxplot() +
coord_flip() +
ylab("") +
labs(title="sparse") +
xlab("R-squared")+
xlim(0.95,1)+
theme_minimal(base_size = 10) +
theme(legend.position="none",
axis.title.x=element_blank())

# linear model with dense features
df_500 <- performance_df[performance_df['q']==500,]

p2 <- ggplot(data = df_500, aes(x = regr.rsq, y = learner_id)) +
geom_boxplot() +
coord_flip() +
ylab("") +
xlab("R-squared")+
labs(title="dense") +
xlim(0.5,1)+
theme_minimal(base_size = 10) +
theme(legend.position="none",
axis.title.x=element_blank())

p <- grid.arrange(p1, p2, nrow= 1)

ggsave("../figure/enet_lasso_ridge_r2.png", plot = p, width = 6, height = 2)
8 changes: 4 additions & 4 deletions slides/regularization/rsrc/enet_tradeoff.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ p1 <- ggplot(data = df_5, aes(x=as.numeric(betas), y = as.numeric(index), group=
labs(title="sparse") +
facet_grid(learner_id~.)+
xlab("value") +
ylab(expression('index of'~betas)) +
ylab(expression('index of'~theta)) +
scale_y_continuous(breaks=1:10)

# linear model with non-sparse features
# linear model with dense features
df_500 <- performance_df[performance_df['q']==500,]
df_500 <- df_500 %>% select(learner_id, betas)

Expand All @@ -70,9 +70,9 @@ p2 <- ggplot(data = df_500, aes(x=as.numeric(betas), y = as.numeric(index), grou
geom_boxplot(width = 0.4, color = "gray50", alpha = 0.5) +
coord_flip()+
facet_grid(learner_id~.) +
labs(title="non-sparse") +
labs(title="dense") +
xlab("value") +
ylab(expression('index of'~betas)) +
ylab(expression('index of'~theta)) +
scale_y_continuous(breaks=1:10)

p <- grid.arrange(p1, p2, nrow=1)
Expand Down
Loading

0 comments on commit 94382b6

Please sign in to comment.