Merge pull request #35 from slds-lmu/use-service-components

slds-lmu · Oct 25, 2024 · e1692a4 · e1692a4
2 parents fef9088 + 1b8bd66
commit e1692a4
Show file tree

Hide file tree

Showing 30 changed files with 1,047 additions and 526 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,19 @@
+# See https://editorconfig.org/
+
+# top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true # POSIX compliant, expected by many tools
+
+# Matches multiple files with brace expansion notation
+# 2 space indentation
+[*.{tex,sty,R,r}]
+indent_style = space
+indent_size = 2
+
+# Tab indentation (no size specified)
+[Makefile]
+indent_style = tab
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+* text=auto eol=lf
+
diff --git a/.github/workflows/pr-slide-check.yaml b/.github/workflows/pr-slide-check.yaml
@@ -70,7 +70,7 @@ jobs:
         run: |
           echo "dir=$(Rscript --quiet -e 'cat(.libPaths()[[1]])')" >> $GITHUB_OUTPUT
       - name: Restore R package cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.r-cache.outputs.dir }}
           key: ${{ runner.os }}-r-${{inputs.cache-version }}-${{ hashFiles('scripts/install_r_deps.R') }}

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,38 @@
+#----------------------------------------------------#
+# ----- Global gitignore file for all lectures ----- #
+# This file is included in slds-lmu/lecture_service  #
+#----------------------------------------------------#
+
+
+#--------------------------------------------#
+#  Things that need to exist but not in git  #
+#--------------------------------------------#
+
+nospeakermargin.tex
+speakermargin.tex
+
+#--------------------------------------------#
+#  Intermediate output (slides, exercises)   #
+#--------------------------------------------#
+
+# The only slide PDF output in this repo should be in slides-pdf
+# include both slides-xyz (i2ml) and slideXY- / tXY- (iml) formats
+slides/*/slides*.pdf
+slides/*/t*.pdf
+
+# Similar things apply to exercises and exercises-pdf
+exercises/*/*.pdf
+exercises/*/*.tex
+exercises/*/*.log
+exercises/*/figure/unnamed-*.pdf
+exercises/*/figure/unnamed-chunk*
+exercises/*/*_files/
+exercises/*/.ipynb_checkpoints/
+
+#-----------------------------------------------------------------------------#
+#  TeX intermediate stuff everybody loves to hate and hates to commit to git  #
+#-----------------------------------------------------------------------------#
+
 *.aux
 *.fdb_latexmk
 *.fls
@@ -8,51 +43,26 @@
 *.toc
 *.vrb
 *.synctex.gz
-*.DS_Store
-.Rbuildignore
-*.Rproj
-*.Rhistory
-.Rproj.user
-.Rproj.user/
-cache/
-docs/
-exercises/*/*.pdf
-!exercises/svm/figure/eps_tubes.pdf
-exercises/*/*.tex
-exercises/*/*.log
-exercises/*/figure/unnamed-*.pdf
-exercises/*/figure/unnamed-chunk*
-tut_*.pdf
-tut_*.tex
-# latex-math
-slides/advriskmin/slides-*.pdf
-slides/online-learning/slides-*.pdf
-slides/multitarget/slides-*.pdf
-slides/imbalanced-learning/slides-*.pdf
-slides/fairness/slides-*.pdf
-slides/cod/slides-*.pdf
-slides/evaluation/slides-*.pdf
-slides/forests/slides-*.pdf
-slides/gaussian-processes/slides-*.pdf
-slides/hypospaces-capacity/slides-*.pdf
-slides/information-theory/slides-*.pdf
-slides/knn/slides-*.pdf
-slides/linear-svm/slides-*.pdf
-slides/mathrefresher/slides-*.pdf
-slides/ml-basics/slides-*.pdf
-slides/ml-philosophy/slides-*.pdf
-slides/mlr3/slides-*.pdf
-slides/multiclass/slides-*.pdf
-slides/nested-resampling/slides-*.pdf
-slides/nonlinear-svm/slides-*.pdf
-slides/regularization/slides-*.pdf
-slides/supervised-classification/slides-*.pdf
-slides/supervised-regression/slides-*.pdf
-slides/trees/slides-*.pdf
-slides/tuning/slides-*.pdf
-slides/boosting/slides-*.pdf
+*.synctex(busy)
+**.pax # pdfannotextractor / pax
+*.pax
+**.bbl
+**.blg
+**.bcf
+**.bcf-SAVE-ERROR
+**.run.xml
+
+#----------------------------------------------------------#
+#  Editor-specific stuff that should generally be ignored  #
+#----------------------------------------------------------#
+
 # vim swap files
-*.swp
+# http://stratus3d.com/blog/2018/06/03/stop-excluding-editor-temp-files-in-gitignore/
+[._]*.s[a-v][a-z]
+[._]*.sw[a-p]
+[._]s[a-v][a-z]
+[._]sw[a-p]
+
 # used for atom editor
 .latexcfg
 # Xournal files
@@ -61,5 +71,18 @@ slides/boosting/slides-*.pdf
 code-demos/code_demo_genclass.R
 code-demos/code_demo_knn.R
 code-demos/code_demo_limo.R
-NAMESPACE
+.idea/*
+*.pkl
+
+# RStudio / R in general
+*.Rproj
+*.Rhistory
+.Rproj.user
+.RData
+.Rdata
+
+#-----------------------------------#
+#  OS-specific temp/preview files   #
+#-----------------------------------#
+*.DS_Store
 
diff --git a/.ignore b/.ignore
@@ -0,0 +1,4 @@
+# Complementary to .gitignore, this file also affects tools like ripgrep
+slides/attic/*
+slides/*/attic
+attic
diff --git a/exercises/aml-test-exam/ex_rnw/ex_imbalanced_learning.Rnw b/exercises/aml-test-exam/ex_rnw/ex_imbalanced_learning.Rnw
@@ -22,7 +22,7 @@ p
 
 \begin{enumerate}
 	%	
-	\item Let $f(\xv) = 2 \cdot \mathds{1}_{[  \thetab^\top \xv \ge 3]} -1 $ with $\thetab=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.
+	\item Let $f(\xv) = 2 \cdot \mathds{1}_{[  \thetav^\top \xv \ge 3]} -1 $ with $\thetav=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.
 	%
 	\item Find all Tomek links with respect to the Euclidean distance on $\Xspace$ in the data set and remove the instances in each Tomek link belonging to the majority class. Repeat the computation in (a).  
 	%	

diff --git a/exercises/aml-test-exam/ex_rnw/sol_imbalanced_learning.Rnw b/exercises/aml-test-exam/ex_rnw/sol_imbalanced_learning.Rnw
@@ -22,15 +22,15 @@ The following data set contains $n = 13$ datapoints, the triangles denote class
 
 \begin{enumerate}
 	%	
-	\item Let $f(\xv) = 2 \cdot \mathds{1}_{[  \thetab^\top \xv \ge 3]} -1 $ with $\thetab=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.
+	\item Let $f(\xv) = 2 \cdot \mathds{1}_{[  \thetav^\top \xv \ge 3]} -1 $ with $\thetav=(1,1)^\top.$ Specify the confusion matrix of $f$ and compute its accuracy as well as its $F_1$ score.
 
 		 \item[]	{\color{blue} \textbf{Solution:}   
 		%	
 		The decision boundary of the classifier is specified by
 %		
 		\begin{align*}
 %			
-			\thetab^\top \xv = 3 \quad &\Leftrightarrow \quad x_1+x_2 = 3 \quad \Leftrightarrow \quad x_2 = 3 - x_1.
+			\thetav^\top \xv = 3 \quad &\Leftrightarrow \quad x_1+x_2 = 3 \quad \Leftrightarrow \quad x_2 = 3 - x_1.
 		%		
 		\end{align*}
 		%

diff --git a/exercises/fairness/ex_rnw/ex_fairness_3.Rnw b/exercises/fairness/ex_rnw/ex_fairness_3.Rnw
@@ -10,31 +10,31 @@ Recall that in logistic regression the probability $	p(y=  +1\,\vert\,\xv)$ is m
 %
 \begin{align*}
 %	
-	\pi_{\thetab}: \ \Xspace & \to [0,1] \\
+	\pi_{\thetav}: \ \Xspace & \to [0,1] \\
 %	
-	 \xv  &\mapsto \frac{1}{1 + \exp \big(-  \langle \thetab ,  \xv  \rangle \big) } \, ,
+	 \xv  &\mapsto \frac{1}{1 + \exp \big(-  \langle \thetav ,  \xv  \rangle \big) } \, ,
 %	
 \end{align*}
 %
-with $\thetab = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.
+with $\thetav = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.
 
 Assume we have a partition of $\Xspace$ into $G\in \mathbb{N}$ groups\footnote{Here, we mean disjoint subsets of $\mathbb{R}^d$ whose union is $\Xspace = \mathbb{R}^d.$}, say $\Xspace_1,\ldots,\Xspace_G.$
 %
 Consider the following quantity
 %
 \begin{align*}
 %	
-	H_{\thetab}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetab}})^2 }{E_{g,+1|{\thetab}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetab}})^2 }{E_{g,-1|{\thetab}}},
+	H_{\thetav}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetav}})^2 }{E_{g,+1|{\thetav}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetav}})^2 }{E_{g,-1|{\thetav}}},
 %	
 \end{align*}
 %
-where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetab}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetab}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
+where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetav}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetav}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
 % 
 \begin{itemize}
 %	
-	\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetab}}$ and $E_{g,-1|{\thetab}}.$
+	\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetav}}$ and $E_{g,-1|{\thetav}}.$
 %	
-	\item [(b)] If the model $\pi_{\thetab}$ is (approximately) well-calibrated, what values should $H_{\thetab}(G)$ take?  What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$
+	\item [(b)] If the model $\pi_{\thetav}$ is (approximately) well-calibrated, what values should $H_{\thetav}(G)$ take?  What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$
 %	
 	\item [(c)] Generate a data set $\D $ with $\Xspace = \R$ of size $N=100$ in the following way:  
 %	
@@ -93,9 +93,9 @@ where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and
 %%	
 %	\begin{align*}
 %		%	
-%		\pi_{\thetab}^C: \ &\Xspace \to \Yspace \\
+%		\pi_{\thetav}^C: \ &\Xspace \to \Yspace \\
 %		%	
-%		&\xv  \mapsto \frac{1}{1 + \exp \big(-  C(\langle \thetab ,  \xv  \rangle )\big) },
+%		&\xv  \mapsto \frac{1}{1 + \exp \big(-  C(\langle \thetav ,  \xv  \rangle )\big) },
 %		%	
 %	\end{align*}
 %%

diff --git a/exercises/fairness/ex_rnw/sol_fairness_3.Rnw b/exercises/fairness/ex_rnw/sol_fairness_3.Rnw
@@ -10,36 +10,36 @@ Recall that in logistic regression the probability $	p(y=  +1\,\vert\,\xv)$ is m
 %
 \begin{align*}
 	%	
-	\pi_{\thetab}: \ \Xspace & \to [0,1] \\
+	\pi_{\thetav}: \ \Xspace & \to [0,1] \\
 	%	
-	\xv  &\mapsto \frac{1}{1 + \exp \big(-  \langle \thetab ,  \xv  \rangle \big) } \, ,
+	\xv  &\mapsto \frac{1}{1 + \exp \big(-  \langle \thetav ,  \xv  \rangle \big) } \, ,
 	%	
 \end{align*}
 %
-with $\thetab = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.
+with $\thetav = (\theta_1, \ldots , \theta_d)^\top \in \mathbb{R}^d$ is a parameter vector.
 
 Assume we have a partition of $\Xspace$ into $G\in \mathbb{N}$ groups\footnote{Here, we mean disjoint subsets of $\mathbb{R}^d$ whose union is $\Xspace = \mathbb{R}^d.$}, say $\Xspace_1,\ldots,\Xspace_G.$
 %
 Consider the following quantity
 %
 \begin{align*}
 	%	
-	H_{\thetab}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetab}})^2 }{E_{g,+1|{\thetab}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetab}})^2 }{E_{g,-1|{\thetab}}},
+	H_{\thetav}(G) = \sum_{g=1}^G \frac{( O_{g,+1} - E_{g,+1|{\thetav}})^2 }{E_{g,+1|{\thetav}}} + \frac{( O_{g,-1} - E_{g,-1|{\thetav}})^2 }{E_{g,-1|{\thetav}}},
 	%	
 \end{align*}
 %
-where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetab}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetab}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
+where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and the corresponding $\xv$ is an element of $\Xspace_g,$ and $E_{g,\pm 1|{\thetav}}$ is the number of \emph{expected} $y's$ which are $\pm 1$ under the model $\pi_{\thetav}$ and the corresponding $\xv$ is an element of $\Xspace_g.$
 % 
 \begin{itemize}
 	%	
-	\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetab}}$ and $E_{g,-1|{\thetab}}.$
+	\item [(a)] Give a mathematical definition of $ O_{g,+1},$ $ O_{g,-1},$ $E_{g,+1|{\thetav}}$ and $E_{g,-1|{\thetav}}.$
 
 	\textbf{Solution:} %			
-	Recall the interpretation of $\pi_{\thetab}(\xv):$ It is giving the (fitted) probability that $y=+1$ for given $\xv.$
+	Recall the interpretation of $\pi_{\thetav}(\xv):$ It is giving the (fitted) probability that $y=+1$ for given $\xv.$
 	%			
-	In other words, we expect that $y=+1$ given $\xv$ with probability $\pi_{\thetab}(\xv).$ 
+	In other words, we expect that $y=+1$ given $\xv$ with probability $\pi_{\thetav}(\xv).$ 
 	%			
-	Similarly, we expect that $y=-1$ given $\xv$ with probability $(1-\pi_{\thetab}(\xv)).$ 
+	Similarly, we expect that $y=-1$ given $\xv$ with probability $(1-\pi_{\thetav}(\xv)).$ 
 	%			
 	With this, we can write the quantities as follows:
 	%
@@ -49,29 +49,29 @@ where $O_{g,\pm1}$ is the number of \emph{observed} $y's$ which are $\pm 1$ and
 		%				
 		O_{g,-1} &= \sum_{i=1}^N   \mathds{1}_{[ \yi = -1  ]} \mathds{1}_{[ \xi \in \Xspace_g  ]}, \\
 		%								
-		E_{g,+1|{\thetab}} &= \sum_{i=1}^N \pi_{\thetab}(\xi) \mathds{1}_{[ \xi \in \Xspace_g  ]},\\
+		E_{g,+1|{\thetav}} &= \sum_{i=1}^N \pi_{\thetav}(\xi) \mathds{1}_{[ \xi \in \Xspace_g  ]},\\
 		%								
-		E_{g,-1|{\thetab}} &= \sum_{i=1}^N (1-\pi_{\thetab}(\xi)) \mathds{1}_{[ \xi \in \Xspace_g  ]}.
+		E_{g,-1|{\thetav}} &= \sum_{i=1}^N (1-\pi_{\thetav}(\xi)) \mathds{1}_{[ \xi \in \Xspace_g  ]}.
 		%				
 	\end{align*}
 	%			
 	%	
-	\item [(b)] If the model $\pi_{\thetab}$ is (approximately) well-calibrated, what values should $H_{\thetab}(G)$ take?  What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$
+	\item [(b)] If the model $\pi_{\thetav}$ is (approximately) well-calibrated, what values should $H_{\thetav}(G)$ take?  What is a desirable property of the partition $\Xspace_1,\ldots,\Xspace_G$ of $\Xspace?$
 
 	\textbf{Solution:}
 
 	%			
 	If the model is approximately well-calibrated, then it should hold for any $s\in[0,1]$
 	%			
-	$$ 	\P(y =  +1\,\vert\,  \pi_{\thetab}(\xv) = s   ) \approx s, \quad \forall  \xv\in \Xspace.$$
+	$$ 	\P(y =  +1\,\vert\,  \pi_{\thetav}(\xv) = s   ) \approx s, \quad \forall  \xv\in \Xspace.$$
 	%			
-	In words, if the logistic model predicts that $y =  +1$ will occur with probability $s = \pi_{\thetab}(\xv),$ then $y =  +1$ should occur with probability (approximately) $s.$
+	In words, if the logistic model predicts that $y =  +1$ will occur with probability $s = \pi_{\thetav}(\xv),$ then $y =  +1$ should occur with probability (approximately) $s.$
 	%
-	In particular, the frequency with which $y =  +1$ is observed for some particular $\xv$ should match the expected frequency under $\pi_{\thetab}(\xv).$
+	In particular, the frequency with which $y =  +1$ is observed for some particular $\xv$ should match the expected frequency under $\pi_{\thetav}(\xv).$
 
-	Thus, we would expect that $ O_{g,+1} \approx E_{g,+1|{\thetab}}$ and $ O_{g,-1} \approx E_{g,-1|{\thetab}}$ for every group $g \in\{1,\ldots,G\},$ if the model $\pi_{\thetab}$ is approximately well-calibrated.
+	Thus, we would expect that $ O_{g,+1} \approx E_{g,+1|{\thetav}}$ and $ O_{g,-1} \approx E_{g,-1|{\thetav}}$ for every group $g \in\{1,\ldots,G\},$ if the model $\pi_{\thetav}$ is approximately well-calibrated.
 	%			
-	Hence, $H_{\thetab}(G)$ should be close to 0 or not ``too large''.
+	Hence, $H_{\thetav}(G)$ should be close to 0 or not ``too large''.
 	%			
 
 	However, as we rarely observe the same exact feature vector $\xv$ we could group some of them together.

diff --git a/exercises/gaussian-processes/ex_rnw/ex_gp_1_22.Rnw b/exercises/gaussian-processes/ex_rnw/ex_gp_1_22.Rnw
@@ -2,7 +2,7 @@
 In the Bayesian linear model, we assume that the data follows the following law:
 %
 $$
-y = \fx + \epsilon = \thetab^T \xv + \epsilon , 
+y = \fx + \epsilon = \thetav^T \xv + \epsilon , 
 $$
 %
 where $\varepsilon \sim \mathcal{N}(0,\sigma^2)$ and independent of $\xv.$
@@ -11,35 +11,35 @@ On the data-level this corresponds to
 %
 \begin{eqnarray*}
 %	
-	\yi &=& \fxi + \epsi = \thetab^T \xi + \epsi, \quad \text{for } i \in \{1, \ldots, n\}
+	\yi &=& \fxi + \epsi = \thetav^T \xi + \epsi, \quad \text{for } i \in \{1, \ldots, n\}
 %	
 \end{eqnarray*}
 %
 where $\epsi \sim \mathcal{N}(0, \sigma^2)$ are iid and all independent of the $\xi$'s.
 %
-In the Bayesian perspective it is assumed that the parameter vector $\thetab$ is stochastic and follows a distribution.
+In the Bayesian perspective it is assumed that the parameter vector $\thetav$ is stochastic and follows a distribution.
 
-Assume we are interested in the so-called maximum a posteriori estimate of $\thetab,$ which is defined by
+Assume we are interested in the so-called maximum a posteriori estimate of $\thetav,$ which is defined by
 %
-$$		\thetabh = \argmax_{\thetab} p(\thetab | \Xmat, \yv).	$$
+$$		\thetavh = \argmax_{\thetav} p(\thetav | \Xmat, \yv).	$$
 %
 \begin{enumerate}
 %	
-  \item Show that if we choose a uniform distribution over the parameter vectors $\thetab$ as the prior belief, i.e.,
+  \item Show that if we choose a uniform distribution over the parameter vectors $\thetav$ as the prior belief, i.e.,
 %  
-	$$  q(\thetab)  \propto 1, $$
+	$$  q(\thetav)  \propto 1, $$
 %  
 	then the  maximum a posteriori estimate coincides with the empirical risk minimizer for the L2-loss (over the linear models).
 %
-  \item Show that if we choose a Gaussian distribution over the parameter vectors $\thetab$ as the prior belief, i.e.,
+  \item Show that if we choose a Gaussian distribution over the parameter vectors $\thetav$ as the prior belief, i.e.,
   %  
-  $$  q(\thetab)  \propto  \exp\biggl[-\frac{1}{2\tau^2}\thetab^\top\thetab\biggr], \qquad \tau>0, $$
+  $$  q(\thetav)  \propto  \exp\biggl[-\frac{1}{2\tau^2}\thetav^\top\thetav\biggr], \qquad \tau>0, $$
   %  
   then the maximum a posteriori estimate coincides for a specific choice of $\tau$ with the regularized empirical risk minimizer for the L2-loss with L2 penalty (over the linear models), i.e., the Ridge regression.
 %  
-  \item Show that if we choose a Laplace distribution over the parameter vectors $\thetab$ as the prior belief, i.e.,
+  \item Show that if we choose a Laplace distribution over the parameter vectors $\thetav$ as the prior belief, i.e.,
 %  
-	$$  q(\thetab)  \propto  \exp\biggl[-\frac{\sum_{i=1}^p |\thetab_i|}{\tau} \biggr], \qquad \tau>0, $$
+	$$  q(\thetav)  \propto  \exp\biggl[-\frac{\sum_{i=1}^p |\thetav_i|}{\tau} \biggr], \qquad \tau>0, $$
 	%  
 	then the maximum a posteriori estimate coincides for a specific choice of $\tau$ with the regularized empirical risk minimizer for the L2-loss with L1 penalty (over the linear models), i.e., the Lasso regression.
 \end{enumerate}