diff --git a/slides/advriskmin/references.bib b/slides/advriskmin/references.bib new file mode 100755 index 00000000..7874fb33 --- /dev/null +++ b/slides/advriskmin/references.bib @@ -0,0 +1,29 @@ +@inproceedings{SOLLICH1999NINTH, + author={Sollich, P.}, + booktitle={1999 Ninth International Conference on Artificial Neural Networks ICANN 99. (Conf. Publ. No. 470)}, + title={Probabilistic interpretations and Bayesian methods for support vector machines}, + year={1999}, + volume={1}, + number={}, + pages={91-96 vol.1}, + keywords={}, + doi={10.1049/cp:19991090}, + url={https://ieeexplore.ieee.org/abstract/document/819547} +} + +@inproceedings{MEYER2021ALTERNATIVE, + title={An alternative probabilistic interpretation of the huber loss}, + author={Meyer, Gregory P}, + booktitle={Proceedings of the ieee/cvf conference on computer vision and pattern recognition}, + pages={5261--5269}, + year={2021}, + url={https://openaccess.thecvf.com/content/CVPR2021/papers/Meyer_An_Alternative_Probabilistic_Interpretation_of_the_Huber_Loss_CVPR_2021_paper.pdf} +} + +@article{SALEH2022STATISTICAL, + title={Statistical properties of the log-cosh loss function used in machine learning}, + author={Saleh, Resve A and Saleh, AK}, + journal={arXiv preprint arXiv:2208.04564}, + year={2022}, + url={https://arxiv.org/pdf/2208.04564} +} diff --git a/slides/advriskmin/slides-advriskmin-bias-variance-decomposition-deepdive.tex b/slides/advriskmin/slides-advriskmin-bias-variance-decomposition-deepdive.tex old mode 100644 new mode 100755 index 79b16f80..01a9150e --- a/slides/advriskmin/slides-advriskmin-bias-variance-decomposition-deepdive.tex +++ b/slides/advriskmin/slides-advriskmin-bias-variance-decomposition-deepdive.tex @@ -4,26 +4,24 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-eval} -\newcommand{\titlefigure}{figure/bias_variance_decomposition-linear_model_bias.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Bias-Variance Decomposition (Deep-Dive) + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/bias_variance_decomposition-linear_model_bias.png + }{ \item Understand how to decompose the generalization error of a learner into \begin{itemize} \item \footnotesize Bias of the learner \item \footnotesize Variance of the learner \item \footnotesize Inherent noise in the data \end{itemize} - } - -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Bias-Variance Decomposition (Deep-Dive)} -\lecture{Introduction to Machine Learning} - - - +} \begin{vbframe} {Bias-Variance decomposition} diff --git a/slides/advriskmin/slides-advriskmin-bias-variance-decomposition.tex b/slides/advriskmin/slides-advriskmin-bias-variance-decomposition.tex old mode 100644 new mode 100755 index 48f76419..8667f112 --- a/slides/advriskmin/slides-advriskmin-bias-variance-decomposition.tex +++ b/slides/advriskmin/slides-advriskmin-bias-variance-decomposition.tex @@ -4,27 +4,25 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-eval} -\newcommand{\titlefigure}{figure/bias_variance_decomposition-linear_model_bias.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Advanced Risk Minimization:\\ + Bias-Variance Decomposition + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/bias_variance_decomposition-linear_model_bias.png + }{ \item Understand how to decompose the generalization error of a learner into \begin{itemize} \item \footnotesize bias of the learner \item \footnotesize variance of the learner \item \footnotesize inherent noise in the data \end{itemize} - } - -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Advanced Risk Minimization:\\ -Bias-Variance Decomposition} -\lecture{Introduction to Machine Learning} - - - +} \begin{vbframe} {Bias-Variance decomposition} diff --git a/slides/advriskmin/slides-advriskmin-classification-01.tex b/slides/advriskmin/slides-advriskmin-classification-01.tex old mode 100644 new mode 100755 index 726e641c..717228eb --- a/slides/advriskmin/slides-advriskmin-classification-01.tex +++ b/slides/advriskmin/slides-advriskmin-classification-01.tex @@ -11,23 +11,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/plot_loss_01.png} -\newcommand{\learninggoals}{ - \item Derive the risk minimizer of the 0-1-loss - \item Derive the optimal constant model for the 0-1-loss -} - \title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - - \begin{document} - -\lecturechapter{0-1-Loss} -\lecture{Introduction to Machine Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + 0-1-Loss + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/plot_loss_01.png + }{ + \item Derive the risk minimizer of the 0-1-loss + \item Derive the optimal constant model for the 0-1-loss +} \begin{vbframe}{0-1-Loss} diff --git a/slides/advriskmin/slides-advriskmin-classification-bernoulli.tex b/slides/advriskmin/slides-advriskmin-classification-bernoulli.tex old mode 100644 new mode 100755 index 93965b31..a178f171 --- a/slides/advriskmin/slides-advriskmin-classification-bernoulli.tex +++ b/slides/advriskmin/slides-advriskmin-classification-bernoulli.tex @@ -14,28 +14,23 @@ \input{../../latex-math/ml-eval} \input{../../latex-math/ml-trees} % For the comparison of Brier and Gini index -\newcommand{\titlefigure}{figure/plot_bernoulli_prob} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Bernoulli Loss + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/plot_bernoulli_prob.png + }{ \item Know the Bernoulli loss and related losses (log-loss, logistic loss, Binomial loss) \item Derive the risk minimizer \item Derive the optimal constant model \item Understand the connection between log-loss and entropy splitting } -\title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - - - -\begin{document} - -\lecturechapter{Bernoulli Loss} -\lecture{Introduction to Machine Learning} - - - \begin{vbframe}{Bernoulli Loss} \vspace*{-0.5cm} diff --git a/slides/advriskmin/slides-advriskmin-classification-brier.tex b/slides/advriskmin/slides-advriskmin-classification-brier.tex old mode 100644 new mode 100755 index 2cb9509e..e0fd43cf --- a/slides/advriskmin/slides-advriskmin-classification-brier.tex +++ b/slides/advriskmin/slides-advriskmin-classification-brier.tex @@ -13,27 +13,23 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-trees} % For the comparison of Brier and Gini index +\title{Introduction to Machine Learning} -\newcommand{\titlefigure}{figure/plot_brier.png} -\newcommand{\learninggoals}{ +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Brier Score + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/plot_brier.png + }{ \item Know the Brier score \item Derive the risk minimizer \item Derive the optimal constant model \item Understand the connection between Brier score and Gini splitting } -\title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - - -\begin{document} - -\lecturechapter{Brier Score} -\lecture{Introduction to Machine Learning} - - % \begin{vbframe}{Classification Losses: (Naive) L2-Loss} diff --git a/slides/advriskmin/slides-advriskmin-classification-deepdive.tex b/slides/advriskmin/slides-advriskmin-classification-deepdive.tex old mode 100644 new mode 100755 index 37e1f508..ff13f89b --- a/slides/advriskmin/slides-advriskmin-classification-deepdive.tex +++ b/slides/advriskmin/slides-advriskmin-classification-deepdive.tex @@ -11,25 +11,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure} -{figure/plot_bernoulli_prob} -\newcommand{\learninggoals}{ - \item Derive the optimal constant model for the binary empirical log loss risk - \item Derive the optimal constant model for the empirical multiclass log loss risk -} +\newcommand{\argminl}{\mathop{\operatorname{arg\,min}}\limits} \title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - -\newcommand{\argminl}{\mathop{\operatorname{arg\,min}}\limits} \begin{document} - -\lecturechapter{Optimal constant model for the empirical log loss risk (Deep-Dive)} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Optimal constant model for the empirical log loss risk (Deep-Dive) + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/plot_bernoulli_prob.png + }{ + \item Derive the optimal constant model for the binary empirical log loss risk + \item Derive the optimal constant model for the empirical multiclass log loss risk +} \begin{vbframe}{Binary log loss: Emp. Risk Minimizer} diff --git a/slides/advriskmin/slides-advriskmin-classification-furtherlosses.tex b/slides/advriskmin/slides-advriskmin-classification-furtherlosses.tex old mode 100644 new mode 100755 index c0f50130..138d1807 --- a/slides/advriskmin/slides-advriskmin-classification-furtherlosses.tex +++ b/slides/advriskmin/slides-advriskmin-classification-furtherlosses.tex @@ -14,26 +14,23 @@ \input{../../latex-math/ml-eval} \input{../../latex-math/ml-trees} % For the comparison of Brier and Gini index +\title{Introduction to Machine Learning} -\newcommand{\titlefigure}{figure/plot_loss_overview_classif.png} -\newcommand{\learninggoals}{ +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Advanced Classification Losses + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/plot_loss_overview_classif.png + }{ \item Know the (squared) hinge loss \item Know the $L2$ loss defined on scores \item Know the exponential loss \item Know the AUC loss } -\title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - - -\begin{document} - -\lecturechapter{Advanced Classification Losses} -\lecture{Introduction to Machine Learning} - \begin{vbframe}{Hinge Loss} \begin{itemize} diff --git a/slides/advriskmin/slides-advriskmin-logreg-deepdive.tex b/slides/advriskmin/slides-advriskmin-logreg-deepdive.tex old mode 100644 new mode 100755 index e8faf925..9aaebaa6 --- a/slides/advriskmin/slides-advriskmin-logreg-deepdive.tex +++ b/slides/advriskmin/slides-advriskmin-logreg-deepdive.tex @@ -11,26 +11,23 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure} -{figure/plot_bernoulli_prob} -\newcommand{\learninggoals}{ - \item Derive the gradient of the logistic regression - \item Derive the Hessian of the logistic regression - \item Show that the logistic regression is a convex problem -} +\newcommand{\argminl}{\mathop{\operatorname{arg\,min}}\limits} \title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - -\newcommand{\argminl}{\mathop{\operatorname{arg\,min}}\limits} \begin{document} - -\lecturechapter{Logistic regression (Deep-Dive)} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Logistic regression (Deep-Dive) + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/plot_bernoulli_prob.png + }{ + \item Derive the gradient of the logistic regression + \item Derive the Hessian of the logistic regression + \item Show that the logistic regression is a convex problem +} \begin{vbframe}{Logistic regression: Risk Problem} diff --git a/slides/advriskmin/slides-advriskmin-losses-properties.tex b/slides/advriskmin/slides-advriskmin-losses-properties.tex old mode 100644 new mode 100755 index 8df50659..82997e6e --- a/slides/advriskmin/slides-advriskmin-losses-properties.tex +++ b/slides/advriskmin/slides-advriskmin-losses-properties.tex @@ -15,29 +15,24 @@ \usepackage{booktabs} -\newcommand{\titlefigure}{figure_man/vgg_example.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Properties of Loss Functions + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/vgg_example.png + }{ % \item Understand why you should care about properties of loss functions \item Statistical properties \item Robustness \item Numerical properties \item Some fundamental terminology - } -\title{Introduction to Machine Learning} -% \author{Bernd Bi{}schl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - - -\begin{document} - -% ------------------------------------------------------------------------------ - -\lecturechapter{Properties of Loss Functions} -\lecture{Introduction to Machine Learning} - \begin{vbframe}{The role of Loss Functions} Why should we care about the choice of the loss function $\Lxy$? diff --git a/slides/advriskmin/slides-advriskmin-max-likelihood-l2.tex b/slides/advriskmin/slides-advriskmin-max-likelihood-l2.tex old mode 100644 new mode 100755 index 4f9e76e2..15ee779d --- a/slides/advriskmin/slides-advriskmin-max-likelihood-l2.tex +++ b/slides/advriskmin/slides-advriskmin-max-likelihood-l2.tex @@ -12,22 +12,21 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/residuals_plot_L2_title.png} -\newcommand{\learninggoals}{ -\item Understand the connection between maximum likelihood and risk minimization -\item Learn the correspondence between a Gaussian error distribution and the L2 loss -} - \title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} \begin{document} - -\lecturechapter{Maximum Likelihood Estimation vs. -Empirical Risk Minimization} -\lecture{Introduction to Machine Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Maximum Likelihood Estimation vs. + Empirical Risk Minimization + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/residuals_plot_L2_title.png + }{ + \item Understand the connection between maximum likelihood and risk minimization + \item Learn the correspondence between a Gaussian error distribution and the L2 loss +} \begin{vbframe}{Maximum Likelihood} @@ -155,7 +154,7 @@ %\item $\thetah \in \argmax_{\thetab} \log\left(\LL(\thetab)\right) \implies $ \end{itemize} %\lz -\item \textbf{But}: The other way around does not always work: We cannot derive a corresponding pdf or error distribution for every loss function -- the Hinge loss is one prominent example, for which some probabilistic interpretation is still possible however, see \citebutton{Sollich, 1999}{https://ieeexplore.ieee.org/abstract/document/819547}. +\item \textbf{But}: The other way around does not always work: We cannot derive a corresponding pdf or error distribution for every loss function -- the Hinge loss is one prominent example, for which some probabilistic interpretation is still possible however, see \citelink{SOLLICH1999NINTH}. \framebreak diff --git a/slides/advriskmin/slides-advriskmin-max-likelihood-other.tex b/slides/advriskmin/slides-advriskmin-max-likelihood-other.tex old mode 100644 new mode 100755 index 20ac9ee2..ef145d01 --- a/slides/advriskmin/slides-advriskmin-max-likelihood-other.tex +++ b/slides/advriskmin/slides-advriskmin-max-likelihood-other.tex @@ -12,25 +12,21 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/residuals_plot_L1_title.png} -\newcommand{\learninggoals}{ - \item Correspondence between Laplace errors and L1 loss - - \item Correspondence between Bernoulli targets and the Bernoulli / log loss - -} - \title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} \begin{document} - -\lecturechapter{Maximum Likelihood Estimation vs. -Empirical Risk Minimization} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Maximum Likelihood Estimation vs. + Empirical Risk Minimization + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/residuals_plot_L1_title.png + }{ + \item Correspondence between Laplace errors and L1 loss + \item Correspondence between Bernoulli targets and the Bernoulli / log loss +} \begin{vbframe}{Laplace Errors - L1-Loss} @@ -66,7 +62,7 @@ MLE for Laplacian errors = ERM with L1-loss. \begin{itemize} -\item Some losses correspond to more complex or less known error densities, like the Huber loss \citebutton{Meyer, 2021}{https://openaccess.thecvf.com/content/CVPR2021/papers/Meyer_An_Alternative_Probabilistic_Interpretation_of_the_Huber_Loss_CVPR_2021_paper.pdf} +\item Some losses correspond to more complex or less known error densities, like the Huber loss \citelink{MEYER2021ALTERNATIVE} \item Huber density is (unsurprisingly) a hybrid of Gaussian and Laplace diff --git a/slides/advriskmin/slides-advriskmin-pseudo-residuals.tex b/slides/advriskmin/slides-advriskmin-pseudo-residuals.tex old mode 100644 new mode 100755 index f881a761..c7747d35 --- a/slides/advriskmin/slides-advriskmin-pseudo-residuals.tex +++ b/slides/advriskmin/slides-advriskmin-pseudo-residuals.tex @@ -12,22 +12,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/pseudo_residual_1.png} -\newcommand{\learninggoals}{ -\item Know the concept of pseudo-residuals -\item Understand the relationship between pseudo-residuals and gradient descent -} - \title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - \begin{document} - -\lecturechapter{Pseudo-Residuals and Gradient Descent} -\lecture{Introduction to Machine Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Pseudo-Residuals and Gradient Descent + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/pseudo_residual_1.png + }{ + \item Know the concept of pseudo-residuals + \item Understand the relationship between pseudo-residuals and gradient descent +} \begin{vbframe}{Pseudo-Residuals} diff --git a/slides/advriskmin/slides-advriskmin-regression-further-losses.tex b/slides/advriskmin/slides-advriskmin-regression-further-losses.tex old mode 100644 new mode 100755 index 68b4561d..53b23108 --- a/slides/advriskmin/slides-advriskmin-regression-further-losses.tex +++ b/slides/advriskmin/slides-advriskmin-regression-further-losses.tex @@ -12,8 +12,17 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/plot_loss_overview.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Advanced Regression Losses + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/plot_loss_overview.png + }{ \item Know the Huber loss \item Know the log-cosh loss \item Know the Cauchy loss @@ -22,17 +31,6 @@ \item Know the quantile loss } -\title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - - -\begin{document} - -\lecturechapter{Advanced Regression Losses} -\lecture{Introduction to Machine Learning} - \begin{vbframe}{Advanced Loss Functions} Special loss functions can be used to estimate non-standard posterior components, to measure errors in a custom way or are designed to have special properties like robustness. @@ -96,7 +94,7 @@ \end{vbframe} -\begin{vbframe}{Log-cosh Loss} +\begin{vbframe}{Log-cosh Loss \citelink{SALEH2022STATISTICAL}} % Confirmed with Bernd: use def from https://heartbeat.fritz.ai/5-regression-loss-functions-all-machine-learners-should-know-4fb140e9d4b0 @@ -149,7 +147,7 @@ \end{columns} -The log-cosh approach to obtain a differentiable approximation of the $L1$ loss can also be extended to differentiable quantile/pinball losses. For more details on the log-cosh loss, see \citebutton{Saleh and Saleh, 2024}{https://arxiv.org/pdf/2208.04564}. +The log-cosh approach to obtain a differentiable approximation of the $L1$ loss can also be extended to differentiable quantile/pinball losses. \framebreak diff --git a/slides/advriskmin/slides-advriskmin-regression-l1-deepdive.tex b/slides/advriskmin/slides-advriskmin-regression-l1-deepdive.tex old mode 100644 new mode 100755 index d745d1db..069aa3b1 --- a/slides/advriskmin/slides-advriskmin-regression-l1-deepdive.tex +++ b/slides/advriskmin/slides-advriskmin-regression-l1-deepdive.tex @@ -12,23 +12,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/loss_absolute_1.png} -\newcommand{\learninggoals}{ - \item Derive the risk minimizer of the L1-loss - \item Derive the optimal constant model for the L1-loss -} - \title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - \begin{document} - -\lecturechapter{L1 Risk Minimizer (Deep-Dive)} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + L1 Risk Minimizer (Deep-Dive) + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/loss_absolute_1.png + }{ + \item Derive the risk minimizer of the L1-loss + \item Derive the optimal constant model for the L1-loss +} \begin{vbframe}{L1-Loss: Risk Minimizer} diff --git a/slides/advriskmin/slides-advriskmin-regression-l2-l1.tex b/slides/advriskmin/slides-advriskmin-regression-l2-l1.tex old mode 100644 new mode 100755 index 7e5c4bdb..2e38c2ba --- a/slides/advriskmin/slides-advriskmin-regression-l2-l1.tex +++ b/slides/advriskmin/slides-advriskmin-regression-l2-l1.tex @@ -12,23 +12,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/loss_quadratic_1.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Regression Losses: L2 and L1 loss + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/loss_quadratic_1.png + }{ \item Derive the risk minimizer of the L2-loss \item Derive the optimal constant model for the L2-loss \item Know risk minimizer and optimal constant model for L1-loss } -\title{Introduction to Machine Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - -\begin{document} - -\lecturechapter{Regression Losses: L2 and L1 loss} -\lecture{Introduction to Machine Learning} - \begin{vbframe}{L2-Loss} \vspace*{-0.5cm} diff --git a/slides/advriskmin/slides-advriskmin-risk-minimizer.tex b/slides/advriskmin/slides-advriskmin-risk-minimizer.tex old mode 100644 new mode 100755 index e7adcb7d..5ac2f4d9 --- a/slides/advriskmin/slides-advriskmin-risk-minimizer.tex +++ b/slides/advriskmin/slides-advriskmin-risk-minimizer.tex @@ -13,24 +13,23 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-hpo} -\newcommand{\titlefigure}{figure_man/optimization_steps.jpeg} -\newcommand{\learninggoals}{ - \item Bayes optimal model (also: risk minimizer, population minimizer) - \item Bayes risk - \item Bayes regret, estimation and approximation error - \item Optimal constant model - \item Consistent learners -} - \title{Introduction to Machine Learning} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - \begin{document} - -\lecturechapter{Risk Minimizers} -\lecture{Introduction to Machine Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Advanced Risk Minimization + }{% Lecture title + Risk Minimizers + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/optimization_steps.jpeg + }{ + \item Bayes optimal model (also: risk minimizer, population minimizer) + \item Bayes risk + \item Bayes regret, estimation and approximation error + \item Optimal constant model + \item Consistent learners +} \begin{vbframe}{Empirical Risk Minimization} Very often, in ML, we minimize the empirical risk diff --git a/slides/boosting/references.bib b/slides/boosting/references.bib new file mode 100755 index 00000000..e66d7f93 --- /dev/null +++ b/slides/boosting/references.bib @@ -0,0 +1,58 @@ +@article{KNEIB2009VARIABLE, + title={Variable selection and model choice in geoadditive regression models}, + author={Kneib, Thomas and Hothorn, Torsten and Tutz, Gerhard}, + journal={Biometrics}, + volume={65}, + number={2}, + pages={626--634}, + year={2009}, + publisher={Oxford University Press}, + url={https://epub.ub.uni-muenchen.de/2063/1/tr003.pdf} +} + +@article{HOFNER2014MODEL, + title={Model-based boosting in R: a hands-on tutorial using the R package mboost}, + author={Hofner, Benjamin and Mayr, Andreas and Robinzonov, Nikolay and Schmid, Matthias}, + journal={Computational statistics}, + volume={29}, + pages={3--35}, + year={2014}, + publisher={Springer}, + url={https://cran.r-project.org/web/packages/mboost/vignettes/mboost_tutorial.pdf} +} + +@misc{KUMAR2019KAGGLE, + author = {Kumar, Ajay}, + title = {Life Expectancy (WHO)}, + year = {2019}, + publisher = {Kaggle}, + url={https://www.kaggle.com/datasets/kumarajarshi/life-expectancy-who} +} + +@article{KEARNSOUR, + title={Our goals are to offer some simple observations and results that seem related to the above questions, with the eventual goal of resolving the Hypothesis Boosting Problem. We begin with the definitions of strong and weak learnability. Let and?` be parameterized classes of representations of Boolean functions; that is,=}, + author={Kearns, Michael}, + url={https://www.cis.upenn.edu/~mkearns/papers/boostnote.pdf} +} + +@article{SCHAPIRE1990STRENGTH, + title={The strength of weak learnability}, + author={Schapire, Robert E}, + journal={Machine learning}, + volume={5}, + pages={197--227}, + year={1990}, + publisher={Springer}, + url={https://link.springer.com/content/pdf/10.1007/BF00116037.pdf} +} + +@inproceedings{FREUND1996EXPERIMENTS, + title={Experiments with a new boosting algorithm}, + author={Freund, Yoav and Schapire, Robert E and others}, + booktitle={icml}, + volume={96}, + pages={148--156}, + year={1996}, + organization={Citeseer}, + url={https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=d186abec952c4348870a73640bf849af9727f5a4} +} \ No newline at end of file diff --git a/slides/boosting/slides-boosting-cwb-advanced.tex b/slides/boosting/slides-boosting-cwb-advanced.tex old mode 100644 new mode 100755 index 60ae60d7..03400c31 --- a/slides/boosting/slides-boosting-cwb-advanced.tex +++ b/slides/boosting/slides-boosting-cwb-advanced.tex @@ -5,22 +5,23 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/compboost-illustration-2.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Advanced CWB + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/compboost-illustration-2.png + }{ \item Details of nonlinear BLs and splines \item Decomposition for splines \item Fair base learner selection \item Feature importance and PDPs } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Gradient Boosting: Advanced CWB} -\lecture{Introduction to Machine Learning} - % ------------------------------------------------------------------------------ \begin{vbframe}{Nonlinear base learners} @@ -91,7 +92,7 @@ %\lz -\citebutton{Kneib~et~al., 2009}{https://epub.ub.uni-muenchen.de/2063/1/tr003.pdf} %Kneib~et~al. (2009) +\citelink{KNEIB2009VARIABLE} %Kneib~et~al. (2009) proposed a decomposition of each base learner into a constant, a linear and a nonlinear part. The boosting algorithm will automatically decide which feature to include -- @@ -237,7 +238,7 @@ % It is possible to calculate $\lambda_j$ by applying the Demmler-Reinsch % orthogonalization (see % \citebutton{Hofer et al. (2011)}{https://www.tandfonline.com/doi/abs/10.1198/jcgs.2011.0922}). - +% % link invalid % % (see Hofer et. al. (2011).\textit{\enquote{A framework for unbiased model selection based on boosting.}}). % Consider the following example of a GAM using splines with 24 B-Spline basis functions: diff --git a/slides/boosting/slides-boosting-cwb-basics.tex b/slides/boosting/slides-boosting-cwb-basics.tex old mode 100644 new mode 100755 index 7b26810d..b26f08fa --- a/slides/boosting/slides-boosting-cwb-basics.tex +++ b/slides/boosting/slides-boosting-cwb-basics.tex @@ -8,21 +8,21 @@ \usepackage{dsfont} \usepackage{transparent} -\newcommand{\titlefigure}{figure/compboost-illustration-2.png} -\newcommand{\learninggoals}{ - \item Concept of CWB - \item Which base learners do we use - \item Built-in feature selection -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: CWB Basics 1} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: CWB Basics 1 + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/compboost-illustration-2.png + }{ + \item Concept of CWB + \item Which base learners do we use + \item Built-in feature selection +} % ------------------------------------------------------------------------------ \begin{vbframe}{Componentwise gradient boosting} diff --git a/slides/boosting/slides-boosting-cwb-basics2.tex b/slides/boosting/slides-boosting-cwb-basics2.tex old mode 100644 new mode 100755 index 4d8d7ef9..6736260d --- a/slides/boosting/slides-boosting-cwb-basics2.tex +++ b/slides/boosting/slides-boosting-cwb-basics2.tex @@ -8,8 +8,17 @@ \usepackage{dsfont} \usepackage{transparent} -\newcommand{\titlefigure}{figure/compboost-illustration-2.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: CWB Basics 2 + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/compboost-illustration-2.png + }{ \item Handling of categorical features \item Intercept handling \item Practical example @@ -17,14 +26,6 @@ %\item Fair base learner selection } -\title{Introduction to Machine Learning}\date{} - -\begin{document} - -\lecturechapter{Gradient Boosting: CWB Basics 2} -\lecture{Introduction to Machine Learning} - - \begin{vbframe}{Handling of categorical features} Feature $x_j$ with $G$ categories. Two options for encoding: @@ -105,7 +106,7 @@ \begin{itemize} \setlength{\itemsep}{0.8em} \item {\footnotesize Add BL $b_{\text{int}} = \theta$ as potential candidate considered in each iteration and remove intercept from all linear BLs, i.e., $b_j(\xv) = \theta_j x_j$.} - \item {\footnotesize Final intercept is given as $\fm[0](\xv) + \hat{\theta}$. Linear BLs without intercept only make sense if covariates are centered (see \citebutton{\texttt{mboost}}{https://cran.r-project.org/web/packages/mboost/vignettes/mboost_tutorial.pdf} tutorial, p.~7)} + \item {\footnotesize Final intercept is given as $\fm[0](\xv) + \hat{\theta}$. Linear BLs without intercept only make sense if covariates are centered (see \citelink{HOFNER2014MODEL} tutorial, p.~7)} \end{itemize} %\framebreak @@ -164,7 +165,7 @@ \begin{vbframe}{Example: Life expectancy} -Consider the \texttt{life expectancy} data set (WHO, available on \citebutton{Kaggle}{https://www.kaggle.com/datasets/kumarajarshi/life-expectancy-who})\,: regression task to predict life expectancy. \\ +Consider the \texttt{life expectancy} data set (WHO, available on \citelink{KUMAR2019KAGGLE})\,: regression task to predict life expectancy. \\ \vspace{0.1cm} We fit a CWB model with linear BLs (with intercept) diff --git a/slides/boosting/slides-boosting-cwb-glm.tex b/slides/boosting/slides-boosting-cwb-glm.tex old mode 100644 new mode 100755 index d814ef49..106c2235 --- a/slides/boosting/slides-boosting-cwb-glm.tex +++ b/slides/boosting/slides-boosting-cwb-glm.tex @@ -5,19 +5,19 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/compboost-to-glm-iter10000.png} -\newcommand{\learninggoals}{ - \item Understand relationship of CWB and GLM - %\item -} - -\title{Introduction to Machine Learning}\date{} +\title{Introduction to Machine Learning} \begin{document} -\lecturechapter{Gradient Boosting: CWB and GLMs} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: CWB and GLMs + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/compboost-to-glm-iter10000.png + }{ + \item Understand relationship of CWB and GLM +} % ------------------------------------------------------------------------------ \begin{vbframe}{Relation to GLM} diff --git a/slides/boosting/slides-boosting-gbm-classification.tex b/slides/boosting/slides-boosting-gbm-classification.tex old mode 100644 new mode 100755 index 0c7916e2..47cc53e3 --- a/slides/boosting/slides-boosting-gbm-classification.tex +++ b/slides/boosting/slides-boosting-gbm-classification.tex @@ -5,21 +5,21 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure_man/boosting_classif_title.PNG} -\newcommand{\learninggoals}{ - \item GB for binary classification simply uses - Bernoulli or exponential loss - \item For multiclass we fit $g$ discriminant functions in parallel -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: Classification} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Classification + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/boosting_classif_title.png + }{ + \item GB for binary classification simply uses + Bernoulli or exponential loss + \item For multiclass we fit $g$ discriminant functions in parallel +} \begin{vbframe}{Binary classification} diff --git a/slides/boosting/slides-boosting-gbm-regularization.tex b/slides/boosting/slides-boosting-gbm-regularization.tex old mode 100644 new mode 100755 index cf4b0132..6e2c2a38 --- a/slides/boosting/slides-boosting-gbm-regularization.tex +++ b/slides/boosting/slides-boosting-gbm-regularization.tex @@ -5,22 +5,21 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/gbm_sine_title} -\newcommand{\learninggoals}{ - \item Learn about three main regularization options: number of iterations, - tree depth and shrinkage - \item Understand how regularization influences model fit -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: Regularization} -\lecture{Introduction to Machine Learning} - - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Regularization + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/gbm_sine_title.png + }{ + \item Learn about three main regularization options: number of iterations, + tree depth and shrinkage + \item Understand how regularization influences model fit +} \begin{vbframe}{Iters, Tree Depth, Learn Rate} diff --git a/slides/boosting/slides-boosting-gbm-with-trees-1.tex b/slides/boosting/slides-boosting-gbm-with-trees-1.tex old mode 100644 new mode 100755 index 02fca8e7..a48bc69a --- a/slides/boosting/slides-boosting-gbm-with-trees-1.tex +++ b/slides/boosting/slides-boosting-gbm-with-trees-1.tex @@ -5,20 +5,20 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/gbm_anim_51.png} -\newcommand{\learninggoals}{ - \item Examples for GB with trees - \item Understand relationship between model structure and interaction depth -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting with Trees 1} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting with Trees 1 + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/gbm_anim_51.png + }{ + \item Examples for GB with trees + \item Understand relationship between model structure and interaction depth +} % ------------------------------------------------------------------------------ \begin{vbframe}{Gradient boosting with trees} diff --git a/slides/boosting/slides-boosting-gbm-with-trees-2.tex b/slides/boosting/slides-boosting-gbm-with-trees-2.tex old mode 100644 new mode 100755 index 8c6fe309..b50b8451 --- a/slides/boosting/slides-boosting-gbm-with-trees-2.tex +++ b/slides/boosting/slides-boosting-gbm-with-trees-2.tex @@ -5,20 +5,20 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure_man/gbm_leaf_adjustment.pdf} -\newcommand{\learninggoals}{ - \item Loss optimal terminal coefficients - \item GB with trees for multiclass problems -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting with Trees 2} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting with Trees 2 + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/gbm_leaf_adjustment.pdf + }{ + \item Loss optimal terminal coefficients + \item GB with trees for multiclass problems +} % ------------------------------------------------------------------------------ diff --git a/slides/boosting/slides-boosting-gradient-boosting-concept.tex b/slides/boosting/slides-boosting-gradient-boosting-concept.tex old mode 100644 new mode 100755 index 3bcefee1..8a2a8c52 --- a/slides/boosting/slides-boosting-gradient-boosting-concept.tex +++ b/slides/boosting/slides-boosting-gradient-boosting-concept.tex @@ -5,21 +5,20 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/fig-gb-concept-2.png} -\newcommand{\learninggoals}{ - \item Understand idea of forward stagewise modelling - \item Understand fitting process of gradient boosting for regression problems -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: Concept} -\lecture{Introduction to Machine Learning} - - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Concept + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/fig-gb-concept-2.png + }{ + \item Understand idea of forward stagewise modelling + \item Understand fitting process of gradient boosting for regression problems +} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % \section{Gradient Boosting} diff --git a/slides/boosting/slides-boosting-intro-adaboost.tex b/slides/boosting/slides-boosting-intro-adaboost.tex old mode 100644 new mode 100755 index 09286a2e..9965be95 --- a/slides/boosting/slides-boosting-intro-adaboost.tex +++ b/slides/boosting/slides-boosting-intro-adaboost.tex @@ -5,22 +5,21 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure_man/adaboost_example_adjusted.png} -\newcommand{\learninggoals}{ - \item Understand general idea of boosting - \item Learn AdaBoost algorithm - \item Understand difference between bagging and boosting -} - - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: Introduction and AdaBoost} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Introduction and AdaBoost + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/adaboost_example_adjusted.png + }{ + \item Understand general idea of boosting + \item Learn AdaBoost algorithm + \item Understand difference between bagging and boosting +} % ------------------------------------------------------------------------------ \begin{vbframe}{Introduction to boosting} @@ -58,13 +57,13 @@ \lz \enquote{Does the existence of a weak learner for a certain problem imply -the existence of a strong learner?} \citebutton{Kearns, 1988}{https://www.cis.upenn.edu/~mkearns/papers/boostnote.pdf} +the existence of a strong learner?} \citelink{KEARNSOUR} \lz \begin{itemize} \item \textbf{Weak learners} are defined as a prediction rule with a correct classification rate that is at least slightly better than random guessing (> 50\% accuracy on a balanced binary problem). -\item We call a learner a \textbf{strong learner} \enquote{if there exists a polynomial-time algorithm that achieves low error with high confidence for all concepts in the class} \citebutton{Schapire, 1990}{https://link.springer.com/content/pdf/10.1007/BF00116037.pdf}. +\item We call a learner a \textbf{strong learner} \enquote{if there exists a polynomial-time algorithm that achieves low error with high confidence for all concepts in the class} \citelink{SCHAPIRE1990STRENGTH}. \end{itemize} @@ -84,7 +83,7 @@ \begin{itemize} \item The \textbf{AdaBoost} (Adaptive Boosting) algorithm is a \textbf{boosting} method - for binary classification by \citebutton{Freund and Schapire (1996)}{https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=d186abec952c4348870a73640bf849af9727f5a4}. + for binary classification by \citelink{FREUND1996EXPERIMENTS}. \item The base learner is sequentially applied to weighted training observations. \item After each base learner fit, currently misclassified observations receive a higher weight for the next iteration, so we focus more on instances that are harder to classify. diff --git a/slides/boosting/slides-boosting-lgm-ctbm.tex b/slides/boosting/slides-boosting-lgm-ctbm.tex old mode 100644 new mode 100755 index 62bc4e16..1e8cde9c --- a/slides/boosting/slides-boosting-lgm-ctbm.tex +++ b/slides/boosting/slides-boosting-lgm-ctbm.tex @@ -5,19 +5,20 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/split_finding_2.png} -\newcommand{\learninggoals}{ - \item Know extensions of XGBoost and how they differ - \item Understand areas upon which extensions of XGBoost improve -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: Modern Techniques} -\lecture{Introduction to Machine Learning} +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Modern Techniques + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/split_finding_2.png + }{ + \item Know extensions of XGBoost and how they differ + \item Understand areas upon which extensions of XGBoost improve +} % sources: https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf % sources: https://towardsdatascience.com/boosting-algorithm-xgboost-4d9ec0207d diff --git a/slides/boosting/slides-boosting-regression-illustrations.tex b/slides/boosting/slides-boosting-regression-illustrations.tex old mode 100644 new mode 100755 index 3cec7cd2..ee3c5ba3 --- a/slides/boosting/slides-boosting-regression-illustrations.tex +++ b/slides/boosting/slides-boosting-regression-illustrations.tex @@ -5,20 +5,20 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/illustration_title.png} -\newcommand{\learninggoals}{ - \item See simple visualizations of boosting in regression - \item Understand impact of different losses and base learners -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: Illustration} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Illustration + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/illustration_title.png + }{ + \item See simple visualizations of boosting in regression + \item Understand impact of different losses and base learners +} % ------------------------------------------------------------------------------ \begin{vbframe}{Gradient boosting illustration - GAM} diff --git a/slides/boosting/slides-boosting-xgboost-deepdive.tex b/slides/boosting/slides-boosting-xgboost-deepdive.tex old mode 100644 new mode 100755 index 3925b324..c1ff4932 --- a/slides/boosting/slides-boosting-xgboost-deepdive.tex +++ b/slides/boosting/slides-boosting-xgboost-deepdive.tex @@ -5,20 +5,21 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/split_finding_2.png} -\newcommand{\learninggoals}{ - \item Understand details of the regularized risk in XGBoost - \item Understand approximation of loss used in optimization - \item Understand split finding algorithm -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: Deep Dive XGBoost Optimization} -\lecture{Introduction to Machine Learning} +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: Deep Dive XGBoost Optimization + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/split_finding_2.png + }{ + \item Understand details of the regularized risk in XGBoost + \item Understand approximation of loss used in optimization + \item Understand split finding algorithm +} \begin{vbframe}{Risk minimization} diff --git a/slides/boosting/slides-boosting-xgboost.tex b/slides/boosting/slides-boosting-xgboost.tex old mode 100644 new mode 100755 index 63810834..5d5fdee2 --- a/slides/boosting/slides-boosting-xgboost.tex +++ b/slides/boosting/slides-boosting-xgboost.tex @@ -5,20 +5,21 @@ \input{../../latex-math/ml-ensembles.tex} \input{../../latex-math/ml-trees.tex} -\newcommand{\titlefigure}{figure/split_finding_2.png} -\newcommand{\learninggoals}{ - \item Overview over XGB - \item Regularization in XGB - \item Approximate split finding -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Gradient Boosting: XGBoost} -\lecture{Introduction to Machine Learning} +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Boosting + }{% Lecture title + Gradient Boosting: XGBoost + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/split_finding_2.png + }{ + \item Overview over XGB + \item Regularization in XGB + \item Approximate split finding +} % sources: https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf % sources: https://towardsdatascience.com/boosting-algorithm-xgboost-4d9ec0207d diff --git a/slides/cod/slides-cod-examples.tex b/slides/cod/slides-cod-examples.tex old mode 100644 new mode 100755 index 640540d4..d0b5e33b --- a/slides/cod/slides-cod-examples.tex +++ b/slides/cod/slides-cod-examples.tex @@ -3,19 +3,19 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/knn_density_plot.png} -\newcommand{\learninggoals}{ - \item See how the performance of k-NN and the linear model deteriorates in high-dimensional spaces -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Curse of Dimensionality - Examples Learning Algorithms} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Curse of Dimensionality + }{% Lecture title + Curse of Dimensionality - Examples Learning Algorithms + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/knn_density_plot.png + }{ + \item See how the performance of k-NN and the linear model deteriorates in high-dimensional spaces +} \begin{vbframe}{Example: k-NN} diff --git a/slides/cod/slides-cod.tex b/slides/cod/slides-cod.tex old mode 100644 new mode 100755 index 9494804b..23e1ad69 --- a/slides/cod/slides-cod.tex +++ b/slides/cod/slides-cod.tex @@ -3,21 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/hypercube.png} -\newcommand{\learninggoals}{ - \item Understand that our intuition about geometry fails in high-dimensional spaces - \item Understand the effects of the curse of dimensionality -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Curse of Dimensionality} -\lecture{Introduction to Machine Learning} - - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Curse of Dimensionality + }{% Lecture title + Curse of Dimensionality + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/hypercube.png + }{ + \item Understand that our intuition about geometry fails in high-dimensional spaces + \item Understand the effects of the curse of dimensionality +} \begin{vbframe}{Curse of dimensionality} diff --git a/slides/feature-selection/references.bib b/slides/feature-selection/references.bib new file mode 100755 index 00000000..fb3bdb10 --- /dev/null +++ b/slides/feature-selection/references.bib @@ -0,0 +1,38 @@ +@article{GUYON2003INTRODUCTION, + title={An introduction to variable and feature selection}, + author={Guyon, Isabelle and Elisseeff, Andr{\'e}}, + journal={Journal of machine learning research}, + volume={3}, + number={Mar}, + pages={1157--1182}, + year={2003}, + url={https://www.jmlr.org/papers/volume3/guyon03a/guyon03a.pdf} +} + +@article{NATARAJAN1995SPARSE, + author={Natarajan, B. K.}, + title={Sparse Approximate Solutions to Linear Systems}, + journal={SIAM Journal on Computing}, + volume={24}, + number={2}, + pages={227-234}, + year={1995}, + doi={10.1137/S0097539792240406}, + url={https://epubs.siam.org/doi/10.1137/S0097539792240406} +} + +@book{MITSUO1996GENETIC, + title={Genetic Algorithms and Engineering Design}, + author={Mitsuo Gen, Runwei Cheng}, + year={1996}, + publisher={John Wiley & Sons}, + isbn={9780470172254}, + url={https://onlinelibrary.wiley.com/doi/book/10.1002/9780470172254} +} + +@misc{OPTIMIZATIONLECTURE, + title={Optimization in Machine Learning - Chapter 08: Evolutionary Algorithms}, + author={slds-lmu}, + year={2021}, + url={https://slds-lmu.github.io/website_optimization/chapters/08_evolutionary/}, +} diff --git a/slides/feature-selection/slides-fs-filters1.tex b/slides/feature-selection/slides-fs-filters1.tex old mode 100644 new mode 100755 index bce79ca8..cde0b6f0 --- a/slides/feature-selection/slides-fs-filters1.tex +++ b/slides/feature-selection/slides-fs-filters1.tex @@ -5,19 +5,22 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-feature-sel} -\newcommand{\titlefigure}{figure_man/correlation_example.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Feature Selection + }{% Lecture title + Feature Selection: Filter Methods + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/correlation_example.png + }{ \item Understand how filter methods work and how to apply them for feature selection. \item Know filter methods based on correlation, test statistics, and mutual information. } -\title{Introduction to Machine Learning} -\date{} -\begin{document} - - \lecturechapter{Feature Selection: Filter Methods} - \lecture{Introduction to Machine Learning} - \begin{vbframe}{Introduction} +\begin{vbframe}{Introduction} \vspace{0.4cm} \begin{itemize} \setlength{\itemsep}{0.8em} diff --git a/slides/feature-selection/slides-fs-filters2.tex b/slides/feature-selection/slides-fs-filters2.tex old mode 100644 new mode 100755 index 0e4bfd90..54cb8c32 --- a/slides/feature-selection/slides-fs-filters2.tex +++ b/slides/feature-selection/slides-fs-filters2.tex @@ -5,19 +5,21 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-feature-sel} -\newcommand{\titlefigure}{figure/guyon_example_correlation.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Feature Selection + }{% Lecture title + Feature Selection: Filter Methods (Examples and Caveats) + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/guyon_example_correlation.png + }{ \item Understand how filter methods can be misleading \item Understand how filters can be applied and tuned } -\title{Introduction to Machine Learning} -\date{} -\begin{document} - - \lecturechapter{Feature Selection: Filter Methods (Examples and Caveats)} - \lecture{Introduction to Machine Learning} - % \begin{vbframe}{Introduction} % \vspace{0.3cm} % \begin{itemize} @@ -132,7 +134,7 @@ \end{figure} \vspace{0.3cm} \footnotesize{\textbf{Information gain from presumably redundant variables}. 2 class problem with indep features. Each class has Gaussian distribution with no covariance. While filter methods suggest redundancy, combination of both vars yields improvement, showing indep vars are not truly redundant. %Right: After 45 degree rotation, showing combination of 2 vars yields separation improvement by factor $\sqrt{2}$, showing i.i.d. vars are not truly redundant. -For further details, see \citebutton{Guyon and Elisseeff, 2003}{https://www.jmlr.org/papers/volume3/guyon03a/guyon03a.pdf}.} +For further details, see \citelink{GUYON2003INTRODUCTION}.} %\footnotesize{\textbf{IG from presumably redundant variables}. Left: 2 class problem with i.i.d. variables. Each class has Gaussian distr. with no covariance. Right: After 45 degree rotation, showing combination of 2 vars yields separation improvement by factor $\sqrt{2}$, showing i.i.d. vars are not truly redundant. For further details, see Guyon and Elisseeff, 2003.} %\footnotesize{Isabelle Guyon, André Elisseeff (2003). An Introduction to Variable and Feature Selection. Journal of Machine Learning Research (3) p. 1157-1182.} diff --git a/slides/feature-selection/slides-fs-introduction.tex b/slides/feature-selection/slides-fs-introduction.tex old mode 100644 new mode 100755 index 86c1548d..20ceffc1 --- a/slides/feature-selection/slides-fs-introduction.tex +++ b/slides/feature-selection/slides-fs-introduction.tex @@ -3,22 +3,23 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/feature_sel_vs_extr.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Feature Selection + }{% Lecture title + Feature Selection: Introduction + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/feature_sel_vs_extr.png + }{ \item Too many features can be harmful in prediction \item Selection vs. extraction \item Types of selection methods } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Feature Selection: Introduction} - \lecture{Introduction to Machine Learning} - - \begin{vbframe}{Introduction} +\begin{vbframe}{Introduction} Feature selection: \\ Finding a well-performing, diff --git a/slides/feature-selection/slides-fs-motivating-examples.tex b/slides/feature-selection/slides-fs-motivating-examples.tex old mode 100644 new mode 100755 index 85b733e5..5995063b --- a/slides/feature-selection/slides-fs-motivating-examples.tex +++ b/slides/feature-selection/slides-fs-motivating-examples.tex @@ -3,22 +3,23 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/tibshirani_fig_18_1_mod.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Feature Selection + }{% Lecture title + Feature Selection: Motivating Examples + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/tibshirani_fig_18_1_mod.png + }{ \item Understand the practical importance of feature selection \item Understand that models with integrated selection do not always work \item Know different categories of selection methods } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - - \lecturechapter{Feature Selection: Motivating Examples} - \lecture{Introduction to Machine Learning} - - \begin{vbframe}{Motivating example 1: Regularization} +\begin{vbframe}{Motivating example 1: Regularization} In case of $p \gg n$, overfitting becomes increasingly problematic, as can be shown by the following simulation study: \begin{itemize} diff --git a/slides/feature-selection/slides-fs-wrapper.tex b/slides/feature-selection/slides-fs-wrapper.tex old mode 100644 new mode 100755 index 8a1fa33f..b9d243ed --- a/slides/feature-selection/slides-fs-wrapper.tex +++ b/slides/feature-selection/slides-fs-wrapper.tex @@ -4,22 +4,23 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/varsel_space.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Feature Selection + }{% Lecture title + Feature Selection: Wrapper methods + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/varsel_space.png + }{ \item Understand how wrapper methods work \item Forward + backward search, EAs \item Advantages and disadvantages } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - - \lecturechapter{Feature Selection: Wrapper methods} - \lecture{Introduction to Machine Learning} - - \begin{vbframe}{Introduction} +\begin{vbframe}{Introduction} \begin{itemize} \item Wrapper methods emerge from the idea that different sets of features can be optimal for different learners @@ -82,7 +83,7 @@ \item Objective $\Psi$ can be different functions, e.g., AIC/BIC for LM or cross-validated performance of a learner \item Poses a discrete combinatorial optimization problem over search space of size = $2^p$, i.e., grows exponentially in $p$ (power set)%as it is the power set of $\{1,\ldots,p\}$ %also known as $L_0$ regularization. - \item Unfortunately can not be solved efficiently in general (NP hard; see, e.g., \citebutton{Natarajan, 1995}{https://epubs.siam.org/doi/10.1137/S0097539792240406}) + \item Unfortunately can not be solved efficiently in general (NP hard; see, e.g., \citelink{NATARAJAN1995SPARSE}) \item Can avoid searching entire space by employing efficient search strategies, traversing search space in a ``smart" way %that finds performant feature subsets \end{itemize} @@ -98,7 +99,7 @@ \item Size of search space = $2^p$, i.e., grows exponentially in $p$ as it is the power set of $\{1,\ldots,p\}$ \item Finding best subset is discrete combinatorial optimization problem. %also known as $L_0$ regularization. - \item It can be shown that this problem unfortunately can not be solved efficiently in general (NP hard; see, e.g., \citebutton{Natarajan, 1995}{https://epubs.siam.org/doi/10.1137/S0097539792240406}) + \item It can be shown that this problem unfortunately can not be solved efficiently in general (NP hard; see, e.g., \citelink{NATARAJAN1995SPARSE}) \item We can avoid having to search the entire space by employing efficient search strategies, moving through the search space in a smart way that finds performant feature subsets %\item By employing efficient search strategories, we can avoid searching the entire space. %\item Of course this does not mean that we have to search the entire space, since there are more efficient search strategies. @@ -266,7 +267,7 @@ \hspace{1cm} \includegraphics[width = 0.75\textwidth]{figure/genetic-alg.png} %\hspace{1cm} - \citebutton{Gu \& Cheng, 1996}{https://onlinelibrary.wiley.com/doi/book/10.1002/9780470172254} + \citelink{MITSUO1996GENETIC} \end{column} % \begin{column}{0.5\textwidth} @@ -274,7 +275,7 @@ \setlength{\itemsep}{0.8em} \item Use CV/validation set for evaluation to avoid overfitting \item Choice of $\mu$ and $\lambda$ allows some control over exploration vs. exploitation trade-off - \item See our \citebutton{optimization lecture}{https://slds-lmu.github.io/website_optimization/chapters/08_evolutionary/} for further information + \item See our \citelink{OPTIMIZATIONLECTURE} for further information \end{itemize} \end{column} \end{columns} diff --git a/slides/information-theory/references.bib b/slides/information-theory/references.bib new file mode 100755 index 00000000..0c45568a --- /dev/null +++ b/slides/information-theory/references.bib @@ -0,0 +1,32 @@ +@inproceedings{CATICHA2004RELATIVE, + title={Relative entropy and inductive inference}, + author={Caticha, Ariel}, + booktitle={AIP conference proceedings}, + volume={707}, + number={1}, + pages={75--96}, + year={2004}, + organization={American Institute of Physics}, + url={https://pubs.aip.org/aip/acp/article-abstract/707/1/75/719597/Relative-Entropy-and-Inductive-Inference} +} + +@book{KHINCHIN1957MATHEMATICAL, + title={Mathematical Foundations of Information Theory}, + author={Khinchin, A.I.A.}, + isbn={9780486604343}, + lccn={57013025}, + series={Dover Books on Mathematics}, + url={https://books.google.de/books/about/Mathematical_Foundations_of_Information.html?id=0uvKF-LT_tMC&redir_esc=y}, + year={1957}, + publisher={Dover Publications}, +} + +@book{JAYNES_2003, + place={Cambridge}, + title={Probability Theory: The Logic of Science}, + publisher={Cambridge University Press}, + author={Jaynes, E. T.}, + editor={Bretthorst, G. LarryEditor}, + year={2003}, + url={https://www.cambridge.org/core/books/probability-theory/9CA08E224FF30123304E6D8935CF1A99} +} diff --git a/slides/information-theory/slides-info-cross-entropy-kld.tex b/slides/information-theory/slides-info-cross-entropy-kld.tex old mode 100644 new mode 100755 index 67d00ed3..3c7c00bc --- a/slides/information-theory/slides-info-cross-entropy-kld.tex +++ b/slides/information-theory/slides-info-cross-entropy-kld.tex @@ -3,20 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/binary-ce.jpg} -\newcommand{\learninggoals}{ - \item Know the cross-entropy - \item Understand the connection between entropy, cross-entropy, and KL divergence -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Cross-Entropy and KL} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Cross-Entropy and KL + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/binary-ce.jpg + }{ + \item Know the cross-entropy + \item Understand the connection between entropy, cross-entropy, and KL divergence +} \begin{vbframe} {Cross-Entropy - Discrete Case} diff --git a/slides/information-theory/slides-info-diffent.tex b/slides/information-theory/slides-info-diffent.tex old mode 100644 new mode 100755 index 82bb7c88..92b537d2 --- a/slides/information-theory/slides-info-diffent.tex +++ b/slides/information-theory/slides-info-diffent.tex @@ -3,19 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/diffent-quant.png} -\newcommand{\learninggoals}{ - \item Know that the entropy expresses expected information for continuous RVs - \item Know the basic properties of the differential entropy -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Differential Entropy} -\lecture{Introduction to Machine Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Differential Entropy + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/diffent-quant.png + }{ + \item Know that the entropy expresses expected information for continuous RVs + \item Know the basic properties of the differential entropy +} \begin{vbframe}{Differential Entropy} \begin{itemize} diff --git a/slides/information-theory/slides-info-entropy.tex b/slides/information-theory/slides-info-entropy.tex old mode 100644 new mode 100755 index b3625f44..42de7838 --- a/slides/information-theory/slides-info-entropy.tex +++ b/slides/information-theory/slides-info-entropy.tex @@ -3,20 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/entropy_plot_reordering.png} -\newcommand{\learninggoals}{ - \item Entropy measures expected information for discrete RVs - \item Know entropy and its properties -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Entropy I} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Entropy I + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/entropy_plot_reordering.png + }{ + \item Entropy measures expected information for discrete RVs + \item Know entropy and its properties +} \begin{vbframe}{Information Theory} diff --git a/slides/information-theory/slides-info-entropy2.tex b/slides/information-theory/slides-info-entropy2.tex old mode 100644 new mode 100755 index aadb616e..41ffea98 --- a/slides/information-theory/slides-info-entropy2.tex +++ b/slides/information-theory/slides-info-entropy2.tex @@ -3,21 +3,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/entropy_bernoulli.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Entropy II + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/entropy_bernoulli.png + }{ \item Further properties of entropy and joint entropy \item Understand that uniqueness theorem justifies choice of entropy formula \item Maximum entropy principle } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Entropy II} -\lecture{Introduction to Machine Learning} - \begin{vbframe}{Entropy of Bernoulli distribution} Let $X$ be Bernoulli / a coin with $\P(X=1) = s$ and $\P(X=0) = 1 - s$. @@ -75,7 +76,7 @@ \begin{vbframe}{The Uniqueness Theorem} -\citebutton{Khinchin, 1957}{https://books.google.de/books/about/Mathematical_Foundations_of_Information.html?id=0uvKF-LT_tMC&redir_esc=y} showed that the only family of functions satisfying +\citelink{KHINCHIN1957MATHEMATICAL} showed that the only family of functions satisfying \begin{itemize} \item $H(p)$ is continuous in probabilities $p(x)$ \item adding or removing an event with $p(x)=0$ does not change it @@ -96,7 +97,7 @@ \normalsize{$$\mathbb{E}[g_m(X)]=\sum_{x \in \Xspace}g_m(x)p(x) = \alpha_m\,\,\text{for}\,\, m=0,\ldots,M$$} \vspace{-0.4cm} -\textbf{Maximum entropy principle} \citebutton{Jaynes, 2003}{https://www.cambridge.org/core/books/probability-theory/9CA08E224FF30123304E6D8935CF1A99}: Among all feasible distributions satisfying the constraints, choose the one with maximum entropy! +\textbf{Maximum entropy principle} \citelink{JAYNES_2003}: Among all feasible distributions satisfying the constraints, choose the one with maximum entropy! \begin{itemize} \item Motivation: ensure no unwarranted assumptions on $p(x)$ are made beyond what we know. \item MEP follows similar logic to Occam's razor and principle of insufficient reason diff --git a/slides/information-theory/slides-info-kl-ment.tex b/slides/information-theory/slides-info-kl-ment.tex old mode 100644 new mode 100755 index a67295a4..3d14a354 --- a/slides/information-theory/slides-info-kl-ment.tex +++ b/slides/information-theory/slides-info-kl-ment.tex @@ -3,23 +3,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/kl_log_diff_plot.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + KL and Maximum Entropy + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/kl_log_diff_plot.png + }{ \item Know the defining properties of the KL \item Understand the relationship between the maximum entropy principle and minimum discrimination information \item Understand the relationship between Shannon entropy and relative entropy } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - - - -\lecturechapter{KL and Maximum Entropy} -\lecture{Introduction to Machine Learning} - \begin{vbframe} {Problems with Differential Entropy} Differential entropy compared to the Shannon entropy: \begin{itemize} @@ -28,7 +27,7 @@ \end{itemize} $\Rightarrow$ Differential entropy is not an uncertainty measure and can not be meaningfully used in a maximum entropy framework. \\ \lz -In the following, we derive an alternative measure, namely the KL divergence (relative entropy), that fixes these shortcomings by taking an inductive inference viewpoint. \citebutton{Caticha, 2003}{https://arxiv.org/pdf/physics/0311093.pdf} +In the following, we derive an alternative measure, namely the KL divergence (relative entropy), that fixes these shortcomings by taking an inductive inference viewpoint. \citelink{CATICHA2004RELATIVE} \end{vbframe} \begin{vbframe}{Inductive inference} We construct a "new" entropy measure $S(p)$ just by desired properties.\\ diff --git a/slides/information-theory/slides-info-kl-ml.tex b/slides/information-theory/slides-info-kl-ml.tex old mode 100644 new mode 100755 index ff9a06b4..ed9af306 --- a/slides/information-theory/slides-info-kl-ml.tex +++ b/slides/information-theory/slides-info-kl-ml.tex @@ -3,21 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/normal_distributions.png} -\newcommand{\learninggoals}{ - \item Understand why measuring distribution similarity is important in ML - \item Understand the advantages of forward and reverse KL -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - - - -\lecturechapter{KL for ML} -\lecture{Introduction to Machine Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + KL for ML + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/normal_distributions.png + }{ + \item Understand why measuring distribution similarity is important in ML + \item Understand the advantages of forward and reverse KL +} \begin{vbframe} {Measuring Distribution Similarity in ML} \begin{itemize} diff --git a/slides/information-theory/slides-info-kl.tex b/slides/information-theory/slides-info-kl.tex old mode 100644 new mode 100755 index 5d4d5bb1..6bc83745 --- a/slides/information-theory/slides-info-kl.tex +++ b/slides/information-theory/slides-info-kl.tex @@ -3,23 +3,23 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/kl_norm_lp_sigma.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Kullback-Leibler Divergence + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/kl_norm_lp_sigma.png + }{ \item Know the KL divergence as distance between distributions \item Understand KL as expected log-difference \item Understand how KL can be used as loss \item Understand that KL is equivalent to the expected likelihood ratio } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Kullback-Leibler Divergence} -\lecture{Introduction to Machine Learning} - - \begin{vbframe} {Kullback-Leibler Divergence} We now want to establish a measure of distance between (discrete or continuous) distributions with the same support for $X \sim p(X)$: diff --git a/slides/information-theory/slides-info-mi-deepdive.tex b/slides/information-theory/slides-info-mi-deepdive.tex old mode 100644 new mode 100755 index e39bf338..12282ab8 --- a/slides/information-theory/slides-info-mi-deepdive.tex +++ b/slides/information-theory/slides-info-mi-deepdive.tex @@ -3,19 +3,19 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/correlation_plot.png} -\newcommand{\learninggoals}{ - \item Understand why MI is invariant under certain reparametrizations -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Mutual Information under Reparametrization (Deep-Dive)} -\lecture{Introduction to Machine Learning} - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Mutual Information under Reparametrization (Deep-Dive) + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/correlation_plot.png + }{ + \item Understand why MI is invariant under certain reparametrizations +} \begin{vbframe} {Mutual information properties} \begin{itemize} diff --git a/slides/information-theory/slides-info-ml.tex b/slides/information-theory/slides-info-ml.tex old mode 100644 new mode 100755 index 9e05f927..4779ecac --- a/slides/information-theory/slides-info-ml.tex +++ b/slides/information-theory/slides-info-ml.tex @@ -3,23 +3,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/multinoulli.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Information Theory for Machine Learning + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/multinoulli.png + }{ \item Minimizing KL =\\ maximizing log-likelihood \item Minimizing KL =\\ minimizing cross-entropy \item Minimizing CE between modeled and observed probabilities =\\log-loss minimization } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Information Theory for Machine Learning} -\lecture{Introduction to Machine Learning} - - - \begin{vbframe}{KL vs Maximum Likelihood} Minimizing KL between the true distribution $p(x)$ and approximating model $q(x|\thetab)$ is equivalent to maximizing the log-likelihood. \begin{align*} diff --git a/slides/information-theory/slides-info-mutual-info.tex b/slides/information-theory/slides-info-mutual-info.tex old mode 100644 new mode 100755 index 44423cac..9caaf59d --- a/slides/information-theory/slides-info-mutual-info.tex +++ b/slides/information-theory/slides-info-mutual-info.tex @@ -3,22 +3,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/entropy_plot.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Joint Entropy and Mutual Information I + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/entropy_plot.png + }{ \item Know the joint entropy \item Know conditional entropy as remaining uncertainty \item Know mutual information as the amount of information of an RV obtained by another } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Joint Entropy and Mutual Information I} -\lecture{Introduction to Machine Learning} - - \begin{vbframe}{Joint entropy} \begin{itemize} \item Recap: The \textbf{joint entropy} of two discrete RVs $X$ and $Y$ with joint pmf $p(x, y)$ is: diff --git a/slides/information-theory/slides-info-mutual-info2.tex b/slides/information-theory/slides-info-mutual-info2.tex old mode 100644 new mode 100755 index 67e7c631..475e7e3b --- a/slides/information-theory/slides-info-mutual-info2.tex +++ b/slides/information-theory/slides-info-mutual-info2.tex @@ -3,19 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/correlation_plot.png} -\newcommand{\learninggoals}{ - \item Know mutual information as the amount of information of an RV obtained by another - \item Know properties of MI -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Joint Entropy and Mutual Information II} -\lecture{Introduction to Machine Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Joint Entropy and Mutual Information II + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/correlation_plot.png + }{ + \item Know mutual information as the amount of information of an RV obtained by another + \item Know properties of MI +} \begin{vbframe}{Mutual Information - Corollaries} diff --git a/slides/information-theory/slides-info-sourcecoding.tex b/slides/information-theory/slides-info-sourcecoding.tex old mode 100644 new mode 100755 index 4edd9f32..9ee4e4d2 --- a/slides/information-theory/slides-info-sourcecoding.tex +++ b/slides/information-theory/slides-info-sourcecoding.tex @@ -3,22 +3,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/equal_decode.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Entropy and Optimal Code Length + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/equal_decode.png + }{ \item Know that source coding is about encoding messages efficiently \item Know how to compute the average length of a code \item Know that the entropy of the source distribution is the lower bound for the average code length } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Entropy and Optimal Code Length} -\lecture{Introduction to Machine Learning} - - \begin{vbframe} {Source Coding} \begin{itemize} \item There is an interesting connection between entropy and a subfield of information theory known as \textbf{source coding}. diff --git a/slides/information-theory/slides-info-sourcecoding2.tex b/slides/information-theory/slides-info-sourcecoding2.tex old mode 100644 new mode 100755 index a63110a7..850f8cb5 --- a/slides/information-theory/slides-info-sourcecoding2.tex +++ b/slides/information-theory/slides-info-sourcecoding2.tex @@ -3,21 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/xent_pq.png} -\newcommand{\learninggoals}{ - \item Know connection between source coding and (cross-)entropy - \item Know that the entropy of the source distribution is the lower bound for the average code length -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Source Coding and Cross-Entropy} -\lecture{Introduction to Machine Learning} - - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Information Theory + }{% Lecture title + Source Coding and Cross-Entropy + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/xent_pq.png + }{ + \item Know connection between source coding and (cross-)entropy + \item Know that the entropy of the source distribution is the lower bound for the average code length +} %%%%%%% CUT HERE SECOND SOURCE CODING CHUNK \begin{vbframe} {Source coding and cross-entropy} diff --git a/slides/linear-svm/slides-linsvm-erm.tex b/slides/linear-svm/slides-linsvm-erm.tex old mode 100644 new mode 100755 index 0ebf5477..ac48ad59 --- a/slides/linear-svm/slides-linsvm-erm.tex +++ b/slides/linear-svm/slides-linsvm-erm.tex @@ -4,21 +4,20 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/other_losses.png} -\newcommand{\learninggoals}{ - \item Know why the SVM problem can be understood as (regularized) empirical risk minimization problem - \item Know that the corresponding loss is the hinge loss -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{SVMs and Empirical Risk Minimization} -\lecture{Introduction to Machine Learning} - -\sloppy +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Linear Support Vector Machines + }{% Lecture title + SVMs and Empirical Risk Minimization + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/other_losses.png + }{ + \item Know why the SVM problem can be understood as (regularized) empirical risk minimization problem + \item Know that the corresponding loss is the hinge loss +} \begin{vbframe}{Regularized empirical risk minimization} diff --git a/slides/linear-svm/slides-linsvm-hard-margin-dual.tex b/slides/linear-svm/slides-linsvm-hard-margin-dual.tex old mode 100644 new mode 100755 index a60beed9..e17c0402 --- a/slides/linear-svm/slides-linsvm-hard-margin-dual.tex +++ b/slides/linear-svm/slides-linsvm-hard-margin-dual.tex @@ -4,22 +4,19 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} - -\newcommand{\titlefigure}{figure/svm_geometry} -\newcommand{\learninggoals}{ - \item Know how to derive the SVM dual problem -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Hard-Margin SVM Dual} -\lecture{Introduction to Machine Learning} - -\sloppy - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Linear Support Vector Machines + }{% Lecture title + Hard-Margin SVM Dual + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/svm_geometry.png + }{ + \item Know how to derive the SVM dual problem +} %\begin{vbframe}{Constrained Optimization} diff --git a/slides/linear-svm/slides-linsvm-hard-margin.tex b/slides/linear-svm/slides-linsvm-hard-margin.tex old mode 100644 new mode 100755 index 0b182dc4..7cb430b4 --- a/slides/linear-svm/slides-linsvm-hard-margin.tex +++ b/slides/linear-svm/slides-linsvm-hard-margin.tex @@ -5,23 +5,21 @@ % attic content on platt scaling: %https://github.com/slds-lmu/lecture_sl/blob/fddf79ea7701306085fccd20c102f1418c46749a/attic/xx-posterior-probs/slides.Rnw#L22 -\newcommand{\titlefigure}{figure/svm_geometry.png} -\newcommand{\learninggoals}{ - \item Know that the hard-margin SVM maximizes the margin between data points and hyperplane - \item Know that this is a quadratic program - \item Know that support vectors are the data points closest to the separating hyperplane -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Linear Hard Margin SVM} -\lecture{Introduction to Machine Learning} - -\sloppy - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Linear Support Vector Machines + }{% Lecture title + Linear Hard Margin SVM + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/svm_geometry.png + }{ + \item Know that the hard-margin SVM maximizes the margin between data points and hyperplane + \item Know that this is a quadratic program + \item Know that support vectors are the data points closest to the separating hyperplane +} \begin{vbframe}{Linear classifiers} diff --git a/slides/linear-svm/slides-linsvm-optimization.tex b/slides/linear-svm/slides-linsvm-optimization.tex old mode 100644 new mode 100755 index 3e44403c..80a8fa41 --- a/slides/linear-svm/slides-linsvm-optimization.tex +++ b/slides/linear-svm/slides-linsvm-optimization.tex @@ -4,24 +4,21 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/svm_training_03.png} -\newcommand{\learninggoals}{ - \item Know that the SVM problem is not differentiable - \item Know how to optimize the SVM problem in the primal via subgradient descent - \item Know how to optimize SVM in the dual formulation via pairwise coordinate ascent -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Support Vector Machine Training} -\lecture{Introduction to Machine Learning} - -\sloppy - - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Linear Support Vector Machines + }{% Lecture title + Support Vector Machine Training + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/svm_training_03.png + }{ + \item Know that the SVM problem is not differentiable + \item Know how to optimize the SVM problem in the primal via subgradient descent + \item Know how to optimize SVM in the dual formulation via pairwise coordinate ascent +} \begin{vbframe}{Support vector machine training} diff --git a/slides/linear-svm/slides-linsvm-soft-margin.tex b/slides/linear-svm/slides-linsvm-soft-margin.tex old mode 100644 new mode 100755 index c1ecb904..08660e93 --- a/slides/linear-svm/slides-linsvm-soft-margin.tex +++ b/slides/linear-svm/slides-linsvm-soft-margin.tex @@ -4,22 +4,21 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/soft_margin_svs.png} -\newcommand{\learninggoals}{ - \item Understand that the hard-margin SVM problem is only solvable for linearly separable data - \item Know that the soft-margin SVM problem therefore allows margin violations - \item The degree to which margin violations are tolerated is controlled by a hyperparameter -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Soft-Margin SVM} -\lecture{Introduction to Machine Learning} - -\sloppy +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Linear Support Vector Machines + }{% Lecture title + Soft-Margin SVM + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/soft_margin_svs.png + }{ + \item Understand that the hard-margin SVM problem is only solvable for linearly separable data + \item Know that the soft-margin SVM problem therefore allows margin violations + \item The degree to which margin violations are tolerated is controlled by a hyperparameter +} \begin{vbframe}{Non-Separable Data} diff --git a/slides/mathrefresher/slides-probability-theory.tex b/slides/mathrefresher/slides-probability-theory.tex old mode 100644 new mode 100755 index b80ef78f..66f04f88 --- a/slides/mathrefresher/slides-probability-theory.tex +++ b/slides/mathrefresher/slides-probability-theory.tex @@ -13,27 +13,24 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-hpo} - -\newcommand{\titlefigure}{figure_man/math_robot.jpg} -\newcommand{\learninggoals}{ - \item Refresher on the basics of probability theory -% \item Bayes risk -% \item Consistent learners -% \item Bayes regret, estimation and approximation error -% \item Optimal constant model -% \item Proper scoring rules -} - \title{Supervised Learning} -% \author{Bernd Bischl, Christoph Molnar, Daniel Schalk, Fabian Scheipl} -\institute{\href{https://compstat-lmu.github.io/lecture_i2ml/}{compstat-lmu.github.io/lecture\_i2ml}} -\date{} - \begin{document} - -\lecturechapter{Refreshing Mathematical Tools} -\lecture{Supervised Learning} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Supervised Learning + }{% Lecture title + Refreshing Mathematical Tools + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/math_robot.jpg + }{ + \item Refresher on the basics of probability theory + % \item Bayes risk + % \item Consistent learners + % \item Bayes regret, estimation and approximation error + % \item Optimal constant model + % \item Proper scoring rules +} \newcommand{\F}{\mathcal{F}} diff --git a/slides/multiclass/slides-mc-binary-reduction.tex b/slides/multiclass/slides-mc-binary-reduction.tex old mode 100644 new mode 100755 index d42a540b..35ccea9c --- a/slides/multiclass/slides-mc-binary-reduction.tex +++ b/slides/multiclass/slides-mc-binary-reduction.tex @@ -3,25 +3,22 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/one_vs_all.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Multiclass Classification + }{% Lecture title + One-vs-Rest and One-vs-One + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/one_vs_all.png + }{ \item Reduce a multiclass problem to multiple binary problems in a model-agnostic way \item Know one-vs-rest reduction \item Know one-vs-one reduction } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{One-vs-Rest and One-vs-One} -\lecture{Introduction to Machine Learning} - - - -\sloppy - \begin{vbframe}{Multiclass to Binary Reduction} diff --git a/slides/multiclass/slides-mc-codebooks.tex b/slides/multiclass/slides-mc-codebooks.tex old mode 100644 new mode 100755 index 44f398f9..8bbbf8ca --- a/slides/multiclass/slides-mc-codebooks.tex +++ b/slides/multiclass/slides-mc-codebooks.tex @@ -3,27 +3,23 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/hill-climbing.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Multiclass Classification + }{% Lecture title + Designing Codebooks and ECOC + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/hill-climbing.png + }{ \item Know what a codebook is \item Understand that codebooks generalize one-vs-one and one-vs-rest \item Know how to define a good codebook and error-correcting output codes (ECOC) \item Know how randomized hill-climbing algorithm is used to find good codebooks } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Designing Codebooks and ECOC} -\lecture{Introduction to Machine Learning} - - - - -\sloppy - \section{Designing Codebooks} \begin{vbframe}{Codebooks} diff --git a/slides/multiclass/slides-mc-losses.tex b/slides/multiclass/slides-mc-losses.tex old mode 100644 new mode 100755 index a7c5ded9..5ea2ebc5 --- a/slides/multiclass/slides-mc-losses.tex +++ b/slides/multiclass/slides-mc-losses.tex @@ -3,25 +3,23 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure/iris_scatter.png} -\newcommand{\learninggoals}{ +\title{Introduction to Machine Learning} + +\begin{document} + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Multiclass Classification + }{% Lecture title + Multiclass Classification and Losses + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/iris_scatter.png + }{ \item Know what multiclass means and which types of classifiers exist \item Know the MC 0-1-loss \item Know the MC brier score \item Know the MC logarithmic loss } -\title{Introduction to Machine Learning} -\date{} - -\begin{document} - -\lecturechapter{Multiclass Classification and Losses} -\lecture{Introduction to Machine Learning} - - -\sloppy - \begin{vbframe}{Multiclass Classification} \textbf{Scenario:} Multiclass classification with $g > 2$ classes diff --git a/slides/multiclass/slides-mc-softmax-regression.tex b/slides/multiclass/slides-mc-softmax-regression.tex old mode 100644 new mode 100755 index e0c8fed1..4213deba --- a/slides/multiclass/slides-mc-softmax-regression.tex +++ b/slides/multiclass/slides-mc-softmax-regression.tex @@ -3,22 +3,20 @@ \input{../../latex-math/basic-math} \input{../../latex-math/basic-ml} -\newcommand{\titlefigure}{figure_man/softmax1.png} -\newcommand{\learninggoals}{ - \item Know softmax regression - \item Understand that softmax regression is a generalization of logistic regression -} - \title{Introduction to Machine Learning} -\date{} \begin{document} - -\lecturechapter{Softmax Regression} -\lecture{Introduction to Machine Learning} - -\sloppy - + +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Multiclass Classification + }{% Lecture title + Softmax Regression + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure_man/softmax1.png + }{ + \item Know softmax regression + \item Understand that softmax regression is a generalization of logistic regression +} \begin{vbframe}{From logistic regression ...} diff --git a/slides/nonlinear-svm/slides-nonlinsvm-featuregen.tex b/slides/nonlinear-svm/slides-nonlinsvm-featuregen.tex old mode 100644 new mode 100755 index 5518343d..b7e1e5d0 --- a/slides/nonlinear-svm/slides-nonlinsvm-featuregen.tex +++ b/slides/nonlinear-svm/slides-nonlinsvm-featuregen.tex @@ -4,20 +4,20 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/circles_boundary.png} -\newcommand{\learninggoals}{ - \item Understand how nonlinearity can be introduced via feature maps in SVMs - \item Know the limitation of feature maps -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Feature Generation for Nonlinear Separation} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Nonlinear Support Vector Machines + }{% Lecture title + Feature Generation for Nonlinear Separation + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/circles_boundary.png + }{ + \item Understand how nonlinearity can be introduced via feature maps in SVMs + \item Know the limitation of feature maps +} \begin{vbframe}{Nonlinearity via Feature Maps} \begin{itemize} diff --git a/slides/nonlinear-svm/slides-nonlinsvm-kernel-poly.tex b/slides/nonlinear-svm/slides-nonlinsvm-kernel-poly.tex old mode 100644 new mode 100755 index 1b812be5..f03c0634 --- a/slides/nonlinear-svm/slides-nonlinsvm-kernel-poly.tex +++ b/slides/nonlinear-svm/slides-nonlinsvm-kernel-poly.tex @@ -4,21 +4,20 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/svm_poly_kernel_deg_9_coef0_1.png} -\newcommand{\learninggoals}{ - \item Know the homogeneous and non-homogeneous polynomial kernel - \item Understand the influence of the choice of the degree on the decision boundary -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{The Polynomial Kernel} -\lecture{Introduction to Machine Learning} - - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Nonlinear Support Vector Machines + }{% Lecture title + The Polynomial Kernel + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/svm_poly_kernel_deg_9_coef0_1.png + }{ + \item Know the homogeneous and non-homogeneous polynomial kernel + \item Understand the influence of the choice of the degree on the decision boundary +} \begin{vbframe}{Homogeneous Polynomial Kernel} $$ k(\xv, \xtil) = (\xv^T \xtil)^d, \text{ for } d \in \N$$ diff --git a/slides/nonlinear-svm/slides-nonlinsvm-kernel-rbf.tex b/slides/nonlinear-svm/slides-nonlinsvm-kernel-rbf.tex old mode 100644 new mode 100755 index 3e9912df..bdaa8a57 --- a/slides/nonlinear-svm/slides-nonlinsvm-kernel-rbf.tex +++ b/slides/nonlinear-svm/slides-nonlinsvm-kernel-rbf.tex @@ -4,22 +4,21 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/svm_rbf_as_basis.png} -\newcommand{\learninggoals}{ - \item Know the Gaussian (RBF) kernel - \item Understand that all data sets are separable with this kernel - \item Understand the effect of the kernel hyperparameter $\sigma$ -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{The Gaussian RBF Kernel} -\lecture{Introduction to Machine Learning} - - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Nonlinear Support Vector Machines + }{% Lecture title + The Gaussian RBF Kernel + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/svm_rbf_as_basis.png + }{ + \item Know the Gaussian (RBF) kernel + \item Understand that all data sets are separable with this kernel + \item Understand the effect of the kernel hyperparameter $\sigma$ +} \begin{vbframe}{RBF Kernel} diff --git a/slides/nonlinear-svm/slides-nonlinsvm-kernel-trick.tex b/slides/nonlinear-svm/slides-nonlinsvm-kernel-trick.tex old mode 100644 new mode 100755 index 150eeb0d..bd0224d5 --- a/slides/nonlinear-svm/slides-nonlinsvm-kernel-trick.tex +++ b/slides/nonlinear-svm/slides-nonlinsvm-kernel-trick.tex @@ -4,21 +4,21 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/svm_linear_kernel.png} -\newcommand{\learninggoals}{ - \item Know how to efficiently introduce non-linearity via the kernel trick - \item Know common kernel functions (linear, polynomial, radial) - \item Know how to compute predictions of the kernel SVM -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{The Kernel Trick} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Nonlinear Support Vector Machines + }{% Lecture title + The Kernel Trick + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/svm_linear_kernel.png + }{ + \item Know how to efficiently introduce non-linearity via the kernel trick + \item Know common kernel functions (linear, polynomial, radial) + \item Know how to compute predictions of the kernel SVM +} \begin{vbframe}{Dual SVM Problem with Feature Map} diff --git a/slides/nonlinear-svm/slides-nonlinsvm-modelsel.tex b/slides/nonlinear-svm/slides-nonlinsvm-modelsel.tex old mode 100644 new mode 100755 index 6e038172..7fd5bb00 --- a/slides/nonlinear-svm/slides-nonlinsvm-modelsel.tex +++ b/slides/nonlinear-svm/slides-nonlinsvm-modelsel.tex @@ -4,20 +4,20 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure_man/rbf_sigma.png} -\newcommand{\learninggoals}{ - \item Know that the SVM is sensitive to hyperparameter choices - \item Understand the effect of different (kernel) hyperparameters -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{SVM Model Selection} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Nonlinear Support Vector Machines + }{% Lecture title + SVM Model Selection + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/rbf_sigma.png + }{ + \item Know that the SVM is sensitive to hyperparameter choices + \item Understand the effect of different (kernel) hyperparameters +} \begin{vbframe}{Model Selection for Kernel SVMs} \begin{itemize} diff --git a/slides/nonlinear-svm/slides-nonlinsvm-rkhs-repr.tex b/slides/nonlinear-svm/slides-nonlinsvm-rkhs-repr.tex old mode 100644 new mode 100755 index 364eeae7..f0041336 --- a/slides/nonlinear-svm/slides-nonlinsvm-rkhs-repr.tex +++ b/slides/nonlinear-svm/slides-nonlinsvm-rkhs-repr.tex @@ -4,21 +4,21 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/circles_ds.png} -\newcommand{\learninggoals}{ - \item Know that for every kernel there is an associated feature map and space (Mercer's Theorem) - \item Know that this feature map is not unique, and the reproducing kernel Hilbert space (RKHS) is a reference space - \item Know the representation of the solution of a SVM is given by the representer theorem -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Reproducing Kernel Hilbert Space and Representer Theorem} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Nonlinear Support Vector Machines + }{% Lecture title + Reproducing Kernel Hilbert Space and Representer Theorem + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/circles_ds.png + }{ + \item Know that for every kernel there is an associated feature map and space (Mercer's Theorem) + \item Know that this feature map is not unique, and the reproducing kernel Hilbert space (RKHS) is a reference space + \item Know the representation of the solution of a SVM is given by the representer theorem +} \begin{vbframe}{Kernels: Mercer's Theorem} \begin{itemize} diff --git a/slides/nonlinear-svm/slides-nonlinsvm-uniapprox.tex b/slides/nonlinear-svm/slides-nonlinsvm-uniapprox.tex old mode 100644 new mode 100755 index 77d54b83..e452cbc2 --- a/slides/nonlinear-svm/slides-nonlinsvm-uniapprox.tex +++ b/slides/nonlinear-svm/slides-nonlinsvm-uniapprox.tex @@ -4,21 +4,21 @@ \input{../../latex-math/basic-ml} \input{../../latex-math/ml-svm} -\newcommand{\titlefigure}{figure/circles_ds.png} -\newcommand{\learninggoals}{ - \item Know that SVMs are non-parameteric models - \item Understand the concept of universal consistency - \item Know that SVMs with an universal kernel (e.g. Gaussian kernel) are universally consistent -} - \title{Introduction to Machine Learning} -\date{} \begin{document} -\lecturechapter{Details on Support Vector Machines} -\lecture{Introduction to Machine Learning} - +\titlemeta{% Chunk title (example: CART, Forests, Boosting, ...), can be empty + Nonlinear Support Vector Machines + }{% Lecture title + Details on Support Vector Machines + }{% Relative path to title page image: Can be empty but must not start with slides/ + figure/circles_ds.png + }{ + \item Know that SVMs are non-parameteric models + \item Understand the concept of universal consistency + \item Know that SVMs with an universal kernel (e.g. Gaussian kernel) are universally consistent +} \section{SVMs as Non-Parametric Models} diff --git a/slides/regularization/figure/l2_reg_hess_01_plot.png b/slides/regularization/figure/l2_reg_hess_01_plot.png old mode 100644 new mode 100755 index bd9d5f60..573986c9 Binary files a/slides/regularization/figure/l2_reg_hess_01_plot.png and b/slides/regularization/figure/l2_reg_hess_01_plot.png differ diff --git a/slides/regularization/figure/l2_reg_hess_02_plot.png b/slides/regularization/figure/l2_reg_hess_02_plot.png old mode 100644 new mode 100755 index f3adfaea..88c5602e Binary files a/slides/regularization/figure/l2_reg_hess_02_plot.png and b/slides/regularization/figure/l2_reg_hess_02_plot.png differ diff --git a/slides/regularization/figure/l2_reg_hess_03_plot.png b/slides/regularization/figure/l2_reg_hess_03_plot.png old mode 100644 new mode 100755 index 714fadad..4bee1332 Binary files a/slides/regularization/figure/l2_reg_hess_03_plot.png and b/slides/regularization/figure/l2_reg_hess_03_plot.png differ diff --git a/slides/regularization/figure/l2_reg_hess_04_plot.png b/slides/regularization/figure/l2_reg_hess_04_plot.png old mode 100644 new mode 100755 index 9b081efc..2db62925 Binary files a/slides/regularization/figure/l2_reg_hess_04_plot.png and b/slides/regularization/figure/l2_reg_hess_04_plot.png differ diff --git a/slides/regularization/rsrc/make_l2_reg_hess_plots.R b/slides/regularization/rsrc/make_l2_reg_hess_plots.R old mode 100644 new mode 100755 index 004b2753..e98f52ef --- a/slides/regularization/rsrc/make_l2_reg_hess_plots.R +++ b/slides/regularization/rsrc/make_l2_reg_hess_plots.R @@ -5,7 +5,7 @@ source("utils.R") library(gridExtra) -lambda <- 50 +lambda <- 90 beta_start <- c(0, 0) step_size <- 0.005 grad <- R_emp_grad @@ -35,39 +35,45 @@ theta_min_skew_data <- as.data.frame(t(theta_min_skew)) theta_min_ridge_data <- as.data.frame(t(Q %*% theta_min_skew)) -x1 <- seq(-1.5,2,length.out = 100) -x2 <- seq(-1,3.5,length.out = 100) +x1 <- seq(-2,2,length.out = 100) +x2 <- seq(-1,5,length.out = 100) +#record contour level +p_con <- plot_r_emp(R_emp, x1, x2, bins=25) +ct_data <- ggplot_build(p_con)$data[[2]] +ct_levels <- unique(ct_data$level) +#preserve half to make plots look better +ct_levels <- ct_levels[-seq(3, length(ct_levels), by = 2)] # R_emp -init_cond_plot <- plot_r_emp(R_emp, x1, x2) + +init_cond_plot <- plot_r_emp(R_emp, x1, x2, breaks=ct_levels) + annotate("label", x = 0.75, y = 3, label = "hat(theta)", parse = TRUE, color = 'black', size = 3, fill = "red") + theme(legend.position="none") + coord_fixed() + geom_hline(yintercept = 0, colour="darkgrey", size=1.2) + geom_vline(xintercept = 0, colour="darkgrey", size=1.2) + geom_line(data=rbind(rep(0, num_features), theta_min), - aes(x=V1, y=V2), colour="red", size=1, arrow = arrow(length = unit(0.09, "npc"))) + aes(x=V1, y=V2), colour="red", size=1, arrow = arrow(length = unit(0.06, "npc"))) -rot_plot <- plot_r_emp(R_emp, x1, x2) + +rot_plot <- plot_r_emp(R_emp, x1, x2, breaks=ct_levels) + theme(legend.position="none") + coord_fixed() + geom_abline(slope = Q[2,1]/Q[1,1], colour="darkgrey", size=1.2) + geom_abline(slope = Q[2,2]/Q[1,2], colour="darkgrey", size=1.2) + geom_line(data=rbind(rep(0, num_features), theta_min), - aes(x=V1, y=V2), colour="red", size=1, arrow = arrow(length = unit(0.09, "npc"))) + + aes(x=V1, y=V2), colour="red", size=1, arrow = arrow(length = unit(0.06, "npc"))) + geom_segment(data=cbind(start=as.data.frame(t(c(0,0))), end=theta_proj1_data ), size=0.9, - arrow=arrow(type="closed", length = unit(0.09, "npc")), + arrow=arrow(type="closed", length = unit(0.06, "npc")), linetype="dashed", aes(x=start.V1, y=start.V2, xend = end.V1, yend = end.V2), colour = "green", arrow.fill = "green") + geom_segment(data=cbind(start=as.data.frame(t(c(0,0))), end=theta_proj2_data ), size=0.9, - arrow=arrow(type="closed", length = unit(0.09, "npc")), + arrow=arrow(type="closed", length = unit(0.06, "npc")), linetype="dashed", aes(x=start.V1, y=start.V2, xend = end.V1, yend = end.V2), colour = "green", arrow.fill = "green") rs <- sapply(1:2, function(i) S[i,i] / (S[i,i] + lambda)) theta_hat <- theta_proj1_data*rs[1] + theta_proj2_data*rs[2] -geom_l2_plot <- plot_r_emp(R_emp, x1, x2) + +geom_l2_plot <- plot_r_emp(R_emp, x1, x2, breaks=ct_levels) + theme(legend.position="none") + coord_fixed() + geom_hline(yintercept = 0, colour="darkgrey", size=1.2) + geom_vline(xintercept = 0, colour="darkgrey", size=1.2) + @@ -85,20 +91,20 @@ geom_l2_plot <- geom_l2_plot + scale_rot_plot <- rot_plot + geom_segment(data=cbind(start=as.data.frame(t(c(0,0))), end= theta_proj1_data*rs[1] ), size=0.9, - arrow=arrow(type="closed", length = unit(0.09, "npc")), + arrow=arrow(type="closed", length = unit(0.06, "npc")), linetype="dashed", aes(x=start.V1, y=start.V2, xend = end.V1, yend = end.V2), colour = "orange", arrow.fill = "orange") + geom_segment(data=cbind(start=as.data.frame(t(c(0,0))), end= theta_proj2_data*rs[2] ), size=0.9, - arrow=arrow(type="closed", length = unit(0.09, "npc")), + arrow=arrow(type="closed", length = unit(0.06, "npc")), linetype="dashed", aes(x=start.V1, y=start.V2, xend = end.V1, yend = end.V2), colour = "orange", arrow.fill = "orange") + geom_segment(data=cbind(start=as.data.frame(t(c(0,0))), end= theta_proj1_data*rs[1] + theta_proj2_data*rs[2] ), size=0.9, - arrow=arrow(length = unit(0.09, "npc")), + arrow=arrow(length = unit(0.06, "npc")), linetype="solid", aes(x=start.V1, y=start.V2, xend = end.V1, yend = end.V2), colour = "yellow") @@ -109,7 +115,7 @@ scale_plot <- init_cond_plot + geom_segment(data=cbind(start=as.data.frame(t(c(0,0))), end= theta_proj1_data*rs[1] + theta_proj2_data*rs[2] ), size=0.9, - arrow=arrow(length = unit(0.09, "npc")), + arrow=arrow(length = unit(0.06, "npc")), linetype="solid", aes(x=start.V1, y=start.V2, xend = end.V1, yend = end.V2), colour = "yellow") @@ -121,26 +127,13 @@ p2 <- grid.arrange(rot_plot, init_cond_plot, ncol=2) p3 <- grid.arrange(scale_rot_plot, scale_plot, ncol=2) ### contour plot for l2 -#set a wider range -x1 <- seq(-2,2,length.out = 100) -x2 <- seq(-1,5,length.out = 100) - -#calculate ellipse distance -dis_elli <- function(x, y, theta){ - dr1 <- x - beta_true[1] - dr2 <- y - beta_true[2] - data <- cbind(dr1, dr2) - mat <- matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), nrow=2) - dr <- data %*% mat - dr[,1] <- dr[,1]/3 #axis ~= 3:1 - apply(dr, 1, dist) -} # Generate data points for plotting circles(ridge) +radius <- sqrt(theta_hat[1]^2 + theta_hat[2]^2)[[1]] #radius for interception point cir_list <- list() seq_data <- seq(0, 2*pi, length.out=100) #points for one circle i <- 1 -for(mul in c(0.15, 0.6, 0.9, 1.26)){ #adjust radius +for(mul in c(radius/8, radius/3, radius/1.5, radius)){ #adjust radius cir_list[[i]] <- data.frame(x=cos(seq_data)*mul, y=sin(seq_data)*mul) i <- i + 1 } @@ -148,40 +141,35 @@ for(mul in c(0.15, 0.6, 0.9, 1.26)){ #adjust radius eval_grid <- expand.grid(x1,x2) eval_grid$r_emp <- apply(eval_grid, 1, R_emp) -#preserve only center part of contour lines -#chose the parameter manually acoording to the plots -distance <- dis_elli(eval_grid[,1], eval_grid[,2], theta=-pi/3-0.014) -eval_grid$dist <- distance -eval_grid_sub <- subset(eval_grid, dist < 1.5) - p_elli <- ggplot() + geom_raster(data=eval_grid, aes(x=Var1, y=Var2, fill=r_emp)) + - geom_contour(data=eval_grid_sub, aes(x=Var1, y=Var2, z=r_emp), - colour="white", bins=7) + + geom_contour(data=eval_grid, aes(x=Var1, y=Var2, z=r_emp), + colour="white", breaks = ct_levels[1:2]) + #use only two contour lines theme(legend.position="none") + coord_fixed() + xlab(expression(theta[1])) + ylab(expression(theta[2])) + - #geom_point(aes(x=theta_hat[1], y=theta_hat[2], color="yellow", size=3)) + scale_fill_viridis(end = 0.9) p_ridge <- p_elli + geom_path(data=cir_list[[1]], aes(x, y), color="white", linetype="dashed") + geom_path(data=cir_list[[2]], aes(x, y), color="white", linetype="dashed") + geom_path(data=cir_list[[3]], aes(x, y), color="white", linetype="dashed") + - geom_path(data=cir_list[[4]], aes(x, y), color="white", linetype="dashed") + geom_path(data=cir_list[[4]], aes(x, y), color="white", linetype="dashed") + + ylim(-1, 5) +beta_true <- data.frame(x=beta_true[1], y=beta_true[2]) +theta_hat <- data.frame(x=theta_hat[1][[1]], y=theta_hat[2][[1]]) p4 <- p_ridge + - geom_point(aes(x=beta_true[1], y=beta_true[2]), color="red", size=3) + - geom_point(aes(x=0.73, y=1.03), color="yellow", size=3) +#intersection point - annotate("label", x = 1.1, y = 0.9, label = "hat(theta)[Ridge]", - parse = TRUE, color = 'black', size = 3, fill = "yellow") + - annotate("label", x = 0.75, y = 3, label = "hat(theta)", - parse = TRUE, color = 'black', size = 3, fill = "red") + - geom_hline(yintercept = 0, colour="darkgrey", size=1.2) + - geom_vline(xintercept = 0, colour="darkgrey", size=1.2) + - xlim(-1.4, 1.6) + - ylim(-1, 4.5) + geom_point(data=beta_true, aes(x=x, y=y), color="red", size=3) + + geom_point(data=theta_hat, aes(x=x, y=y), color="yellow", size=3) + + annotate("label", x=1.2, y=0.8, label="hat(theta)[Ridge]", + parse=TRUE, color='black', size=3, fill="yellow") + + annotate("label", x = 0.75, y=3, label="hat(theta)", + parse=TRUE, color='black', size=3, fill="red") + + geom_hline(yintercept=0, colour="darkgrey", size=1.2) + + geom_vline(xintercept=0, colour="darkgrey", size=1.2) + ggsave("../figure/l2_reg_hess_01_plot.png", plot = p1, width = 5.5, height = 3.5, dpi="retina") ggsave("../figure/l2_reg_hess_02_plot.png", plot = p2, width = 5.5, height = 3.5, dpi="retina") diff --git a/slides/regularization/rsrc/utils.R b/slides/regularization/rsrc/utils.R old mode 100644 new mode 100755 index 0a4de01c..601554b6 --- a/slides/regularization/rsrc/utils.R +++ b/slides/regularization/rsrc/utils.R @@ -28,13 +28,13 @@ R_reg_l2 <- function(beta, lambda = 0.1, features = X, target = y){ return(R_emp(beta, features, target) + (0.5*lambda * sum(beta^2))) } -plot_r_emp <- function(r_emp, x1, x2){ +plot_r_emp <- function(r_emp, x1, x2, bins=NULL, breaks=NULL){ eval_grid <- expand.grid(x1,x2) eval_grid$r_emp <- apply(eval_grid, 1, r_emp) ggplot(eval_grid) + geom_raster(aes(x=Var1, y=Var2, fill=r_emp)) + - geom_contour(aes(x=Var1, y=Var2, z=r_emp), colour="white") + + geom_contour(aes(x=Var1, y=Var2, z=r_emp), colour="white", bins=bins, breaks=breaks) + xlab(expression(theta[1])) + ylab(expression(theta[2])) + scale_fill_viridis(end = 0.9)