From c4780fb2a52913f0e1fca1c4a10909d55b9ce7e6 Mon Sep 17 00:00:00 2001 From: ludwigbothmann <46222472+ludwigbothmann@users.noreply.github.com> Date: Tue, 6 Feb 2024 00:48:09 +0100 Subject: [PATCH] Updates from Overleaf --- slides/regularization/slides-regu-others.tex | 21 ++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/slides/regularization/slides-regu-others.tex b/slides/regularization/slides-regu-others.tex index 6062596f..23e7187b 100644 --- a/slides/regularization/slides-regu-others.tex +++ b/slides/regularization/slides-regu-others.tex @@ -35,19 +35,24 @@ \end{vbframe} -\begin{vbframe}{$Lq$ regularization} -Besides $L1$ and $L2$ norm we could use any $Lq$ (quasi-)norm for regularization. +\begin{vbframe}{$Lq$ regularization \citebutton{Knight and Fu, 2000}{https://websites.umich.edu/~jizhu/jizhu/KnightFu-AoS00.pdf}} +Besides $L1$/$L2$ we could use any $Lq$ (quasi-)norm penalty $\lambda \Vert \thetab \Vert_q^q$. \begin{figure} - \scalebox{0.55}{\includegraphics{figure_man/lasso_ridge_hat.png}}\\ + \scalebox{0.53}{\includegraphics{figure_man/lasso_ridge_hat.png}}\\ %\includegraphics[height=2.3cm]{figure_man/contour.pdf} -\caption{\textit{Top:} Ridge and Lasso loss contours and feasible regions. -\textit{Bottom:} Different feasible region shapes for $Lq$ norms $\sum_j |\theta_j|^q$.} +\caption{{\scriptsize \textit{Top:} loss contours and $L1$/$L2$ constraints. +\textit{Bottom:} Constraints for $Lq$ norms $\sum_j |\theta_j|^q$.}} \end{figure} - -Note that for $q<1$ the penalty becomes non-convex (much harder to optimize!) and for $q>1$ no sparsity is obtained - +\vspace{-0.4cm} +{\footnotesize +\begin{itemize} + \item For $q<1$ penalty becomes non-convex but for $q>1$ no sparsity is achieved + \item Non-convex $Lq$ regularization has some nice properties like \textbf{oracle property} \citebutton{Zou, 2006}{http://users.stat.umn.edu/~zouxx019/Papers/adalasso.pdf}: consistent (+asy. unbiased) param estimation and variable selection + \item Downside: non-convexity of penalty makes optimization even harder than $L1$ (no unique global minimum but many bad local minima) +\end{itemize} +} \end{vbframe}