From 8382b7f7fc992853673924d82cfa9113519c0381 Mon Sep 17 00:00:00 2001 From: ludwigbothmann <46222472+ludwigbothmann@users.noreply.github.com> Date: Thu, 7 Dec 2023 11:59:01 +0100 Subject: [PATCH] Updates from Overleaf --- ...ive.tex => slides-regu-lasso-deepdive.tex} | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) rename slides/regularization/{slides-regu-softthresholding-lasso-deepdive.tex => slides-regu-lasso-deepdive.tex} (68%) diff --git a/slides/regularization/slides-regu-softthresholding-lasso-deepdive.tex b/slides/regularization/slides-regu-lasso-deepdive.tex similarity index 68% rename from slides/regularization/slides-regu-softthresholding-lasso-deepdive.tex rename to slides/regularization/slides-regu-lasso-deepdive.tex index b8bed098..ce318643 100644 --- a/slides/regularization/slides-regu-softthresholding-lasso-deepdive.tex +++ b/slides/regularization/slides-regu-lasso-deepdive.tex @@ -21,23 +21,22 @@ \begin{vbframe}{Soft-thresholding and L1 regularization} In the lecture, we wanted to solve \[ - \min_{\thetab} \mathcal{\tilde R}_{\text{reg}}(\thetab) = \min_{\thetab}\mathcal{R}_{\text{emp}}(\thetah) + \sum_j \left[ \frac{1}{2} H_{j,j} (\theta_j - \hat{\theta}_j)^2 \right] + \sum_j \lambda |\theta_j|. + \min_{\thetab} \mathcal{\tilde R}_{\text{reg}}(\thetab) = \min_{\thetab}\mathcal{R}_{\text{emp}}(\thetah) + \sum_j \left[ \frac{1}{2} H_{j,j} (\theta_j - \hat{\theta}_j)^2 \right] + \sum_j \lambda |\theta_j| \] -Note that we can separate the dimensions, i.e., +with $H_{j,j} \geq 0, \lambda > 0$. Note that we can separate the dimensions, i.e., -\[\mathcal{\tilde R}_{\text{reg}}(\thetab) = \sum_j g_j(\theta_j) \text{ with } g_j(\theta_j) = \frac{1}{2} H_{j,j} (\theta_j - \hat{\theta}_j)^2 + \lambda |\theta_j|.\] +\[\mathcal{\tilde R}_{\text{reg}}(\thetab) = \sum_j z_j(\theta_j) \text{ with } z_j(\theta_j) = \frac{1}{2} H_{j,j} (\theta_j - \hat{\theta}_j)^2 + \lambda |\theta_j|.\] -Hence, we can minimize each $g_j$ separately to find the global minimum. \\ -\lz - -Each $g_j$ is convex since it is a sum of convex functions. For convex functions, every stationary point is a minimum. \\ +Hence, we can minimize each $z_j$ separately to find the global minimum. \\ \lz +If $H_{j,j} = 0$, then $z_j$ is clearly minimized by $\hat{\theta}_{\text{Lasso},j} = 0.$ Otherwise, $z_j$ is strictly convex since $\frac{1}{2} H_{j,j} (\theta_j - \hat{\theta}_j)^2$ is strictly convex and the sum of a strictly convex function and a convex function is strictly convex. \\ \framebreak -Thus, we analyze the stationary points $\hat{\theta}_{\text{Lasso},j}$ of $g_j.$ \\ +For convex functions, every stationary point is a minimum. +Thus, we analyze the stationary points $\hat{\theta}_{\text{Lasso},j}$ of $z_j$ for $H_{j,j} > 0.$ \\ \lz For this, we assume we already know the sign of the minimizer and then derive conditions for which our assumption holds. \\ \lz @@ -45,7 +44,7 @@ \lz NB: \begin{itemize} - \item For $\theta_j > 0: \frac{d}{d\theta_j}\vert \theta_j\vert = \frac{d}{d\theta_j}\theta_j = 1$ and + \item For $\theta_j > 0: \frac{d}{d\theta_j}\vert \theta_j\vert = \frac{d}{d\theta_j}\theta_j = 1.$ \item For $\theta_j < 0: \frac{d}{d\theta_j}\vert \theta_j\vert = \frac{d}{d\theta_j}\left(-\theta_j\right) = -1$. \end{itemize} @@ -59,7 +58,7 @@ \hfill \begin{minipage}{0.49\textwidth} \begin{align*} - \frac{d}{d \theta_j}g_j(\theta_j) &= H_{j,j}\theta_j - H_{j,j} \hat{\theta}_j + \lambda \overset{!}{=} 0 \\ + \frac{d}{d \theta_j}z_j(\theta_j) &= H_{j,j}\theta_j - H_{j,j} \hat{\theta}_j + \lambda \overset{!}{=} 0 \\ &\Rightarrow \hat{\theta}_{\text{Lasso},j} = \hat{\theta}_j -\frac{\lambda}{H_{j,j}} > 0 \\ &\iff \hat{\theta}_j > \frac{\lambda}{H_{j,j}} @@ -75,7 +74,7 @@ \hfill \begin{minipage}{0.49\textwidth} \begin{align*} - \frac{d}{d \theta_j}g_j(\theta_j) &= H_{j,j}\theta_j - H_{j,j} \hat{\theta}_j - \lambda \overset{!}{=} 0 \\ + \frac{d}{d \theta_j}z_j(\theta_j) &= H_{j,j}\theta_j - H_{j,j} \hat{\theta}_j - \lambda \overset{!}{=} 0 \\ &\Rightarrow \hat{\theta}_{\text{Lasso},j} = \hat{\theta}_j +\frac{\lambda}{H_{j,j}} < 0 \\ &\iff \hat{\theta}_j < -\frac{\lambda}{H_{j,j}} @@ -89,16 +88,15 @@ \end{minipage} \hfill \begin{minipage}{0.49\textwidth} -$\Rightarrow$ If $\hat{\theta}_j \in [-\frac{\lambda}{H_{j,j}}, \frac{\lambda}{H_{j,j}}]$ then $g_j$ has no stationary point with $$\hat{\theta}_{\text{Lasso},j} < 0 \text{ or } \hat{\theta}_{\text{Lasso},j} > 0.$$ \\ -However, at least one stationary point must exist since $g_j$ is a regularized convex function with $\lambda > 0.$\\ -$(\Rightarrow$ An equivalent constraint with $\vert\theta_j\vert \leq t \in\R_+$ must exist.) +$\Rightarrow$ If $\hat{\theta}_j \in [-\frac{\lambda}{H_{j,j}}, \frac{\lambda}{H_{j,j}}]$ then $z_j$ has no stationary point with $$\hat{\theta}_{\text{Lasso},j} < 0 \text{ or } \hat{\theta}_{\text{Lasso},j} > 0.$$ +However, a unique stationary point must exist since $z_j$ is strictly convex for $H_{j,j} > 0$. This means, here, $z_j$ is strictly monotonically decreasing (increasing) for $\theta_j < 0 $ ($\theta_j > 0 $). \\ \end{minipage} \\ \begin{align*}\Rightarrow \hat{\theta}_{\text{Lasso},j} &= \begin{cases} - \hat{\theta}_j + \frac{\lambda}{H_{j,j}} &, \text{if} \;\hat{\theta}_j < -\frac{\lambda}{H_{j,j}} \\ - 0 &, \text{if} \;\hat{\theta}_j \in [-\frac{\lambda}{H_{j,j}}, \frac{\lambda}{H_{j,j}}] \\ - \hat{\theta}_j - \frac{\lambda}{H_{j,j}} &, \text{if} \;\hat{\theta}_j > \frac{\lambda}{H_{j,j}} \\ + \hat{\theta}_j + \frac{\lambda}{H_{j,j}} &, \text{if} \;\hat{\theta}_j < -\frac{\lambda}{H_{j,j}} \text{ and } H_{j,j} > 0\\ + 0 &, \text{if} \;\hat{\theta}_j \in [-\frac{\lambda}{H_{j,j}}, \frac{\lambda}{H_{j,j}}] \text{ or } H_{j,j} = 0\\ + \hat{\theta}_j - \frac{\lambda}{H_{j,j}} &, \text{if} \;\hat{\theta}_j > \frac{\lambda}{H_{j,j}} \text{ and } H_{j,j} > 0 \\ \end{cases} \end{align*}