Skip to content

Commit

Permalink
Merge pull request #35 from slds-lmu/use-service-components
Browse files Browse the repository at this point in the history
  • Loading branch information
jemus42 authored Oct 25, 2024
2 parents fef9088 + 1b8bd66 commit e1692a4
Show file tree
Hide file tree
Showing 30 changed files with 1,047 additions and 526 deletions.
19 changes: 19 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# See https://editorconfig.org/

# top-most EditorConfig file
root = true

# Unix-style newlines with a newline ending every file
[*]
end_of_line = lf
insert_final_newline = true # POSIX compliant, expected by many tools

# Matches multiple files with brace expansion notation
# 2 space indentation
[*.{tex,sty,R,r}]
indent_style = space
indent_size = 2

# Tab indentation (no size specified)
[Makefile]
indent_style = tab
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
* text=auto eol=lf

2 changes: 1 addition & 1 deletion .github/workflows/pr-slide-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
run: |
echo "dir=$(Rscript --quiet -e 'cat(.libPaths()[[1]])')" >> $GITHUB_OUTPUT
- name: Restore R package cache
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ${{ steps.r-cache.outputs.dir }}
key: ${{ runner.os }}-r-${{inputs.cache-version }}-${{ hashFiles('scripts/install_r_deps.R') }}
Expand Down
113 changes: 68 additions & 45 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
#----------------------------------------------------#
# ----- Global gitignore file for all lectures ----- #
# This file is included in slds-lmu/lecture_service #
#----------------------------------------------------#


#--------------------------------------------#
# Things that need to exist but not in git #
#--------------------------------------------#

nospeakermargin.tex
speakermargin.tex

#--------------------------------------------#
# Intermediate output (slides, exercises) #
#--------------------------------------------#

# The only slide PDF output in this repo should be in slides-pdf
# include both slides-xyz (i2ml) and slideXY- / tXY- (iml) formats
slides/*/slides*.pdf
slides/*/t*.pdf

# Similar things apply to exercises and exercises-pdf
exercises/*/*.pdf
exercises/*/*.tex
exercises/*/*.log
exercises/*/figure/unnamed-*.pdf
exercises/*/figure/unnamed-chunk*
exercises/*/*_files/
exercises/*/.ipynb_checkpoints/

#-----------------------------------------------------------------------------#
# TeX intermediate stuff everybody loves to hate and hates to commit to git #
#-----------------------------------------------------------------------------#

*.aux
*.fdb_latexmk
*.fls
Expand All @@ -8,51 +43,26 @@
*.toc
*.vrb
*.synctex.gz
*.DS_Store
.Rbuildignore
*.Rproj
*.Rhistory
.Rproj.user
.Rproj.user/
cache/
docs/
exercises/*/*.pdf
!exercises/svm/figure/eps_tubes.pdf
exercises/*/*.tex
exercises/*/*.log
exercises/*/figure/unnamed-*.pdf
exercises/*/figure/unnamed-chunk*
tut_*.pdf
tut_*.tex
# latex-math
slides/advriskmin/slides-*.pdf
slides/online-learning/slides-*.pdf
slides/multitarget/slides-*.pdf
slides/imbalanced-learning/slides-*.pdf
slides/fairness/slides-*.pdf
slides/cod/slides-*.pdf
slides/evaluation/slides-*.pdf
slides/forests/slides-*.pdf
slides/gaussian-processes/slides-*.pdf
slides/hypospaces-capacity/slides-*.pdf
slides/information-theory/slides-*.pdf
slides/knn/slides-*.pdf
slides/linear-svm/slides-*.pdf
slides/mathrefresher/slides-*.pdf
slides/ml-basics/slides-*.pdf
slides/ml-philosophy/slides-*.pdf
slides/mlr3/slides-*.pdf
slides/multiclass/slides-*.pdf
slides/nested-resampling/slides-*.pdf
slides/nonlinear-svm/slides-*.pdf
slides/regularization/slides-*.pdf
slides/supervised-classification/slides-*.pdf
slides/supervised-regression/slides-*.pdf
slides/trees/slides-*.pdf
slides/tuning/slides-*.pdf
slides/boosting/slides-*.pdf
*.synctex(busy)
**.pax # pdfannotextractor / pax
*.pax
**.bbl
**.blg
**.bcf
**.bcf-SAVE-ERROR
**.run.xml

#----------------------------------------------------------#
# Editor-specific stuff that should generally be ignored #
#----------------------------------------------------------#

# vim swap files
*.swp
# http://stratus3d.com/blog/2018/06/03/stop-excluding-editor-temp-files-in-gitignore/
[._]*.s[a-v][a-z]
[._]*.sw[a-p]
[._]s[a-v][a-z]
[._]sw[a-p]

# used for atom editor
.latexcfg
# Xournal files
Expand All @@ -61,5 +71,18 @@ slides/boosting/slides-*.pdf
code-demos/code_demo_genclass.R
code-demos/code_demo_knn.R
code-demos/code_demo_limo.R
NAMESPACE
.idea/*
*.pkl

# RStudio / R in general
*.Rproj
*.Rhistory
.Rproj.user
.RData
.Rdata

#-----------------------------------#
# OS-specific temp/preview files #
#-----------------------------------#
*.DS_Store

4 changes: 4 additions & 0 deletions .ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Complementary to .gitignore, this file also affects tools like ripgrep
slides/attic/*
slides/*/attic
attic
2 changes: 1 addition & 1 deletion exercises/aml-test-exam/ex_rnw/ex_imbalanced_learning.Rnw
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ p

\begin{enumerate}
%
\item Let $f(\xv) = 2 \cdot \mathds{1}_{[ \thetab^\top \xv \ge 3]} -1 $ with $\thetab=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.
\item Let $f(\xv) = 2 \cdot \mathds{1}_{[ \thetav^\top \xv \ge 3]} -1 $ with $\thetav=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.
%
\item Find all Tomek links with respect to the Euclidean distance on $\Xspace$ in the data set and remove the instances in each Tomek link belonging to the majority class. Repeat the computation in (a).
%
Expand Down
4 changes: 2 additions & 2 deletions exercises/aml-test-exam/ex_rnw/sol_imbalanced_learning.Rnw
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ The following data set contains $n = 13$ datapoints, the triangles denote class

\begin{enumerate}
%
\item Let $f(\xv) = 2 \cdot \mathds{1}_{[ \thetab^\top \xv \ge 3]} -1 $ with $\thetab=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.
\item Let $f(\xv) = 2 \cdot \mathds{1}_{[ \thetav^\top \xv \ge 3]} -1 $ with $\thetav=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.

\item[] {\color{blue} \textbf{Solution:}
%
The decision boundary of the classifier is specified by
%
\begin{align*}
%
\thetab^\top \xv = 3 \quad &\Leftrightarrow \quad x_1+x_2 = 3 \quad \Leftrightarrow \quad x_2 = 3 - x_1.
\thetav^\top \xv = 3 \quad &\Leftrightarrow \quad x_1+x_2 = 3 \quad \Leftrightarrow \quad x_2 = 3 - x_1.
%
\end{align*}
%
Expand Down
18 changes: 9 additions & 9 deletions exercises/fairness/ex_rnw/ex_fairness_3.Rnw
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,31 @@ Recall that in logistic regression the probability $ p(y= +1\,\vert\,\xv)$ is m
%
\begin{align*}
%
\pi_{\thetab}: \ \Xspace & \to [0,1] \\
\pi_{\thetav}: \ \Xspace & \to [0,1] \\
%
\xv &\mapsto \frac{1}{1 + \exp \big(- \langle \thetab , \xv \rangle \big) } \, ,
\xv &\mapsto \frac{1}{1 + \exp \big(- \langle \thetav , \xv \rangle \big) } \, ,
%
\end{align*}
%
with $\thetab = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.
with $\thetav = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.

Assume we have a partition of $\Xspace$ into $G\in \mathbb{N}$ groups\footnote{Here, we mean disjoint subsets of $\mathbb{R}^d$ whose union is $\Xspace = \mathbb{R}^d.$}, say $\Xspace_1,\ldots,\Xspace_G.$
%
Consider the following quantity
%
\begin{align*}
%
H_{\thetab}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetab}})^2 }{E_{g,+1|{\thetab}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetab}})^2 }{E_{g,-1|{\thetab}}},
H_{\thetav}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetav}})^2 }{E_{g,+1|{\thetav}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetav}})^2 }{E_{g,-1|{\thetav}}},
%
\end{align*}
%
where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetab}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetab}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetav}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetav}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
%
\begin{itemize}
%
\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetab}}$ and $E_{g,-1|{\thetab}}.$
\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetav}}$ and $E_{g,-1|{\thetav}}.$
%
\item [(b)] If the model $\pi_{\thetab}$ is (approximately) well-calibrated, what values should $H_{\thetab}(G)$ take? What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$
\item [(b)] If the model $\pi_{\thetav}$ is (approximately) well-calibrated, what values should $H_{\thetav}(G)$ take? What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$
%
\item [(c)] Generate a data set $\D $ with $\Xspace = \R$ of size $N=100$ in the following way:
%
Expand Down Expand Up @@ -93,9 +93,9 @@ where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and
%%
% \begin{align*}
% %
% \pi_{\thetab}^C: \ &\Xspace \to \Yspace \\
% \pi_{\thetav}^C: \ &\Xspace \to \Yspace \\
% %
% &\xv \mapsto \frac{1}{1 + \exp \big(- C(\langle \thetab , \xv \rangle )\big) },
% &\xv \mapsto \frac{1}{1 + \exp \big(- C(\langle \thetav , \xv \rangle )\big) },
% %
% \end{align*}
%%
Expand Down
34 changes: 17 additions & 17 deletions exercises/fairness/ex_rnw/sol_fairness_3.Rnw
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,36 @@ Recall that in logistic regression the probability $ p(y= +1\,\vert\,\xv)$ is m
%
\begin{align*}
%
\pi_{\thetab}: \ \Xspace & \to [0,1] \\
\pi_{\thetav}: \ \Xspace & \to [0,1] \\
%
\xv &\mapsto \frac{1}{1 + \exp \big(- \langle \thetab , \xv \rangle \big) } \, ,
\xv &\mapsto \frac{1}{1 + \exp \big(- \langle \thetav , \xv \rangle \big) } \, ,
%
\end{align*}
%
with $\thetab = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.
with $\thetav = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.

Assume we have a partition of $\Xspace$ into $G\in \mathbb{N}$ groups\footnote{Here, we mean disjoint subsets of $\mathbb{R}^d$ whose union is $\Xspace = \mathbb{R}^d.$}, say $\Xspace_1,\ldots,\Xspace_G.$
%
Consider the following quantity
%
\begin{align*}
%
H_{\thetab}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetab}})^2 }{E_{g,+1|{\thetab}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetab}})^2 }{E_{g,-1|{\thetab}}},
H_{\thetav}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetav}})^2 }{E_{g,+1|{\thetav}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetav}})^2 }{E_{g,-1|{\thetav}}},
%
\end{align*}
%
where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetab}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetab}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetav}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetav}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
%
\begin{itemize}
%
\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetab}}$ and $E_{g,-1|{\thetab}}.$
\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetav}}$ and $E_{g,-1|{\thetav}}.$

\textbf{Solution:} %
Recall the interpretation of $\pi_{\thetab}(\xv):$ It is giving the (fitted) probability that $y=+1$ for given $\xv.$
Recall the interpretation of $\pi_{\thetav}(\xv):$ It is giving the (fitted) probability that $y=+1$ for given $\xv.$
%
In other words, we expect that $y=+1$ given $\xv$ with probability $\pi_{\thetab}(\xv).$
In other words, we expect that $y=+1$ given $\xv$ with probability $\pi_{\thetav}(\xv).$
%
Similarly, we expect that $y=-1$ given $\xv$ with probability $(1-\pi_{\thetab}(\xv)).$
Similarly, we expect that $y=-1$ given $\xv$ with probability $(1-\pi_{\thetav}(\xv)).$
%
With this, we can write the quantities as follows:
%
Expand All @@ -49,29 +49,29 @@ where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and
%
O_{g,-1} &= \sum_{i=1}^N \mathds{1}_{[ \yi = -1 ]} \mathds{1}_{[ \xi \in \Xspace_g ]}, \\
%
E_{g,+1|{\thetab}} &= \sum_{i=1}^N \pi_{\thetab}(\xi) \mathds{1}_{[ \xi \in \Xspace_g ]},\\
E_{g,+1|{\thetav}} &= \sum_{i=1}^N \pi_{\thetav}(\xi) \mathds{1}_{[ \xi \in \Xspace_g ]},\\
%
E_{g,-1|{\thetab}} &= \sum_{i=1}^N (1-\pi_{\thetab}(\xi)) \mathds{1}_{[ \xi \in \Xspace_g ]}.
E_{g,-1|{\thetav}} &= \sum_{i=1}^N (1-\pi_{\thetav}(\xi)) \mathds{1}_{[ \xi \in \Xspace_g ]}.
%
\end{align*}
%
%
\item [(b)] If the model $\pi_{\thetab}$ is (approximately) well-calibrated, what values should $H_{\thetab}(G)$ take? What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$
\item [(b)] If the model $\pi_{\thetav}$ is (approximately) well-calibrated, what values should $H_{\thetav}(G)$ take? What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$

\textbf{Solution:}

%
If the model is approximately well-calibrated, then it should hold for any $s\in[0,1]$
%
$$ \P(y = +1\,\vert\, \pi_{\thetab}(\xv) = s ) \approx s, \quad \forall \xv\in \Xspace.$$
$$ \P(y = +1\,\vert\, \pi_{\thetav}(\xv) = s ) \approx s, \quad \forall \xv\in \Xspace.$$
%
In words, if the logistic model predicts that $y = +1$ will occur with probability $s = \pi_{\thetab}(\xv),$ then $y = +1$ should occur with probability (approximately) $s.$
In words, if the logistic model predicts that $y = +1$ will occur with probability $s = \pi_{\thetav}(\xv),$ then $y = +1$ should occur with probability (approximately) $s.$
%
In particular, the frequency with which $y = +1$ is observed for some particular $\xv$ should match the expected frequency under $\pi_{\thetab}(\xv).$
In particular, the frequency with which $y = +1$ is observed for some particular $\xv$ should match the expected frequency under $\pi_{\thetav}(\xv).$

Thus, we would expect that $ O_{g,+1} \approx E_{g,+1|{\thetab}}$ and $ O_{g,-1} \approx E_{g,-1|{\thetab}}$ for every group $g \in\{1,\ldots,G\},$ if the model $\pi_{\thetab}$ is approximately well-calibrated.
Thus, we would expect that $ O_{g,+1} \approx E_{g,+1|{\thetav}}$ and $ O_{g,-1} \approx E_{g,-1|{\thetav}}$ for every group $g \in\{1,\ldots,G\},$ if the model $\pi_{\thetav}$ is approximately well-calibrated.
%
Hence, $H_{\thetab}(G)$ should be close to 0 or not ``too large''.
Hence, $H_{\thetav}(G)$ should be close to 0 or not ``too large''.
%

However, as we rarely observe the same exact feature vector $\xv$ we could group some of them together.
Expand Down
22 changes: 11 additions & 11 deletions exercises/gaussian-processes/ex_rnw/ex_gp_1_22.Rnw
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
In the Bayesian linear model, we assume that the data follows the following law:
%
$$
y = \fx + \epsilon = \thetab^T \xv + \epsilon ,
y = \fx + \epsilon = \thetav^T \xv + \epsilon ,
$$
%
where $\varepsilon \sim \mathcal{N}(0,\sigma^2)$ and independent of $\xv.$
Expand All @@ -11,35 +11,35 @@ On the data-level this corresponds to
%
\begin{eqnarray*}
%
\yi &=& \fxi + \epsi = \thetab^T \xi + \epsi, \quad \text{for } i \in \{1, \ldots, n\}
\yi &=& \fxi + \epsi = \thetav^T \xi + \epsi, \quad \text{for } i \in \{1, \ldots, n\}
%
\end{eqnarray*}
%
where $\epsi \sim \mathcal{N}(0, \sigma^2)$ are iid and all independent of the $\xi$'s.
%
In the Bayesian perspective it is assumed that the parameter vector $\thetab$ is stochastic and follows a distribution.
In the Bayesian perspective it is assumed that the parameter vector $\thetav$ is stochastic and follows a distribution.

Assume we are interested in the so-called maximum a posteriori estimate of $\thetab,$ which is defined by
Assume we are interested in the so-called maximum a posteriori estimate of $\thetav,$ which is defined by
%
$$ \thetabh = \argmax_{\thetab} p(\thetab | \Xmat, \yv). $$
$$ \thetavh = \argmax_{\thetav} p(\thetav | \Xmat, \yv). $$
%
\begin{enumerate}
%
\item Show that if we choose a uniform distribution over the parameter vectors $\thetab$ as the prior belief, i.e.,
\item Show that if we choose a uniform distribution over the parameter vectors $\thetav$ as the prior belief, i.e.,
%
$$ q(\thetab) \propto 1, $$
$$ q(\thetav) \propto 1, $$
%
then the maximum a posteriori estimate coincides with the empirical risk minimizer for the L2-loss (over the linear models).
%
\item Show that if we choose a Gaussian distribution over the parameter vectors $\thetab$ as the prior belief, i.e.,
\item Show that if we choose a Gaussian distribution over the parameter vectors $\thetav$ as the prior belief, i.e.,
%
$$ q(\thetab) \propto \exp\biggl[-\frac{1}{2\tau^2}\thetab^\top\thetab\biggr], \qquad \tau>0, $$
$$ q(\thetav) \propto \exp\biggl[-\frac{1}{2\tau^2}\thetav^\top\thetav\biggr], \qquad \tau>0, $$
%
then the maximum a posteriori estimate coincides for a specific choice of $\tau$ with the regularized empirical risk minimizer for the L2-loss with L2 penalty (over the linear models), i.e., the Ridge regression.
%
\item Show that if we choose a Laplace distribution over the parameter vectors $\thetab$ as the prior belief, i.e.,
\item Show that if we choose a Laplace distribution over the parameter vectors $\thetav$ as the prior belief, i.e.,
%
$$ q(\thetab) \propto \exp\biggl[-\frac{\sum_{i=1}^p |\thetab_i|}{\tau} \biggr], \qquad \tau>0, $$
$$ q(\thetav) \propto \exp\biggl[-\frac{\sum_{i=1}^p |\thetav_i|}{\tau} \biggr], \qquad \tau>0, $$
%
then the maximum a posteriori estimate coincides for a specific choice of $\tau$ with the regularized empirical risk minimizer for the L2-loss with L1 penalty (over the linear models), i.e., the Lasso regression.
\end{enumerate}
Expand Down
Loading

0 comments on commit e1692a4

Please sign in to comment.