add cross-entropy plots and cross-entropy kl comparison plot

slds-lmu · Dec 6, 2023 · cc5d1fa · cc5d1fa
1 parent b8a4b3f
commit cc5d1fa
Show file tree

Hide file tree

Showing 7 changed files with 260 additions and 0 deletions.
diff --git a/slides/information-theory/figure/cross_entropy_plot_1.png b/slides/information-theory/figure/cross_entropy_plot_1.png
diff --git a/slides/information-theory/figure/cross_entropy_plot_2.png b/slides/information-theory/figure/cross_entropy_plot_2.png
diff --git a/slides/information-theory/figure/kl_ce_comparison.png b/slides/information-theory/figure/kl_ce_comparison.png
diff --git a/slides/information-theory/rsrc/make_cross_entropy_plots.R b/slides/information-theory/rsrc/make_cross_entropy_plots.R
@@ -0,0 +1,158 @@
+library(ggplot2)
+library(gridExtra)
+library(extraDistr)
+
+### CREATE LOG DIFFERENCE PLOT FOR KL
+
+set.seed(123)
+
+cross_ent <- function(x, p, q, first) {
+  ent_p <- -p * log(p)
+  kl <- p * log(p / q)
+  cross_ent1 <- ent_p + kl
+  cross_ent2 <- -p * log(q)
+
+  data <- data.frame(
+    x = x,
+    P = p,
+    Q = q,
+    EntP = ent_p,
+    KL = kl,
+    Cross_Ent1 = cross_ent1,
+    Cross_Ent2 = cross_ent2
+  )
+
+  if (first == "normal") {
+    kl_int <- function(x) {
+      p <- dnorm(x, 0, 1)
+      q <- dlaplace(x, 0, 3)
+      log_ratio <- log(p / q)
+      p * log_ratio
+    }
+    result <- integrate(kl_int, lower = -20, upper = 20)
+    kl <- round(result$value, 2)
+
+    ent_int <- function(x) {
+      p <- dnorm(x, 0, 1)
+      q <- dlaplace(x, 0, 3)
+      - p * log(p)
+    }
+    result <- integrate(ent_int, lower = -20, upper = 20)
+    entropy <- round(result$value, 2)
+
+    cross_entropy1 = entropy + kl
+
+    cross_int <- function(x) {
+      p <- dnorm(x, 0, 1)
+      q <- dlaplace(x, 0, 3)
+      - p * log(q)
+    }
+    result <- integrate(cross_int, lower = -20, upper = 20)
+    cross_entropy2 <- round(result$value, 2)
+
+  } else if (first == "laplace") {
+    kl_int <- function(x) {
+      q <- dnorm(x, 0, 1)
+      p <- dlaplace(x, 0, 3)
+      log_ratio <- log(p / q)
+      p * log_ratio
+    }
+    result <- integrate(kl_int, lower = -20, upper = 20)
+    kl <- round(result$value, 2)
+
+    ent_int <- function(x) {
+      q <- dnorm(x, 0, 1)
+      p <- dlaplace(x, 0, 3)
+      - p * log(p)
+    }
+    result <- integrate(ent_int, lower = -20, upper = 20)
+    entropy <- round(result$value, 2)
+
+    cross_entropy1 = entropy + kl
+
+    cross_int <- function(x) {
+      q <- dnorm(x, 0, 1)
+      p <- dlaplace(x, 0, 3)
+      - p * log(q)
+    }
+    result <- integrate(cross_int, lower = -20, upper = 20)
+    cross_entropy2 <- round(result$value, 2)
+  }
+
+  plot1 = ggplot(data, aes(x = x)) +
+    geom_line(aes(y = P),
+              color = "blue",
+              size = 1,
+              linetype = "solid") +
+    geom_line(aes(y = Q),
+              color = "red",
+              size = 1,
+              linetype = "solid") +
+    labs(title = "N(0,1) and LP(0,3) Densities", x = "x", y = "Density") +
+    scale_color_manual(values = c("blue"))
+
+  plot2 = ggplot(data, aes(x = x)) +
+    geom_line(
+      aes(y = EntP),
+      color = "blue",
+      size = 1,
+      linetype = "solid"
+    ) +
+    geom_line(
+      aes(y = KL),
+      color = "orange",
+      size = 1,
+      linetype = "solid"
+    ) +
+    geom_ribbon(aes(ymin = KL, ymax = EntP), alpha = 0.2) +
+    geom_ribbon(aes(ymin = 0, ymax = KL), alpha = 0.2) +
+    labs(
+      title = sprintf("H(p) = %g, D_KL(p||q) =  %g", entropy, kl),
+      x = "x",
+      y = "Integrals"
+    ) +
+    scale_color_manual(values = c("blue"))
+
+  plot3 = ggplot(data, aes(x = x)) +
+    geom_line(
+      aes(y = Cross_Ent1),
+      color = "darkgreen",
+      size = 1,
+      linetype = "solid"
+    ) +
+    geom_ribbon(aes(ymin = 0, ymax = Cross_Ent1), alpha = 0.2) +
+    labs(
+      title = sprintf("H(p||q) = %g + %g = %g", entropy, kl, cross_entropy1),
+      x = "x",
+      y = "Cross-Entropy"
+    ) +
+    scale_color_manual(values = c("blue"))
+
+  plot4 = ggplot(data, aes(x = x)) +
+    geom_line(
+      aes(y = Cross_Ent2),
+      color = "darkgreen",
+      size = 1,
+      linetype = "solid"
+    ) +
+    geom_ribbon(aes(ymin = 0, ymax = Cross_Ent2), alpha = 0.2) +
+    labs(
+      title = sprintf("H(p||q) = -Int[p(x)*log(q(x))dx] = %g", cross_entropy2),
+      x = "x",
+      y = "Cross-Entropy"
+    ) +
+    scale_color_manual(values = c("blue"))
+
+  plot = grid.arrange(plot1, plot2, plot3, plot4,  ncol = 2)
+
+  return(plot)
+
+}
+
+x <- seq(-4, 4, length.out = 1000)
+plot1 = cross_ent(x, p = dnorm(x, 0, 1), q = dlaplace(x, 0, 3), first = "normal")
+plot2 = cross_ent(x, p = dlaplace(x, 0, 3), q = dnorm(x, 0, 1), first = "laplace")
+
+ggsave("..figure/cross_entropy_plot_1.png", plot = plot1, width =8, height = 5)
+ggsave("..figure/cross_entropy_plot_2.png", plot = plot2, width =8, height = 5)
+
diff --git a/slides/information-theory/rsrc/make_kl_ce_comparison.R b/slides/information-theory/rsrc/make_kl_ce_comparison.R
@@ -0,0 +1,67 @@
+library(ggplot2)
+library(gridExtra)
+library(extraDistr)
+
+set.seed(123)
+
+### CREATE KL PLOT FOR VARYING b of LP(0,b) with N(0,1)
+
+x <- seq(-4, 4, length.out = 1000)
+seq <- seq(0.1, 10, length.out = 1000)
+kls = list()
+ces = list()
+
+for (i in seq_along(seq)){
+
+  integrand <- function(x) {
+    p <- dnorm(x, 0, 1)
+    q <- dlaplace(x, 0, seq[i])
+    log_ratio <- log(p/q)
+    p*log_ratio
+  }
+
+  result <- integrate(integrand, lower = -20, upper = 20)
+  kl <-result$value
+  kls[[i]] = kl
+}
+
+for (i in seq_along(seq)){
+
+  integrand <- function(x) {
+    p <- dnorm(x, 0, 1)
+    q <- dlaplace(x, 0, seq[i])
+    -p*log(q)
+  }
+
+  result <- integrate(integrand, lower = -20, upper = 20)
+  ce <-result$value
+  ces[[i]] = ce
+}
+
+data <- data.frame(Sigma = seq, KL = unlist(kls), CE = unlist(ces))
+
+min_kl <- round(min(data$KL), 2)
+min_ce <- round(min(data$CE), 2)
+minimizer_kl <- round(data$Sigma[which.min(data$KL)], 2)
+minimizer_ce <- round(data$Sigma[which.min(data$CE)], 2)
+
+plot1 = ggplot(data, aes(x = Sigma)) +
+  geom_line(aes(y = KL), color = "orange", size = 1, linetype = "solid") +
+  labs(title = "KL Divergence depending on Sigma", x = "Sigma", y = "KL Divergence") +
+  scale_y_continuous(limits = c(0, 5)) +
+  geom_text(aes(x = 5, y = 3, label = paste("min D_KL(p||q) =",min_kl)), color = "black", size = 3) +
+  geom_text(aes(x = 5, y = 2, label = paste("Minimizer =",minimizer_kl)), color = "black", size = 3) +
+  scale_color_manual(values = c("orange"))
+
+
+plot2 = ggplot(data, aes(x = Sigma)) +
+  geom_line(aes(y = CE), color = "darkgreen", size = 1, linetype = "solid") +
+  labs(title = "Cross-Entropy depending on Sigma", x = "Sigma", y = "Cross-Entropy") +
+  geom_text(aes(x = 5, y = 1.5, label = paste("min H(p||q) =",min_ce)), color = "black", size = 3) +
+  geom_text(aes(x = 5, y = 0.5, label = paste("Minimizer =",minimizer_ce)), color = "black", size = 3) +
+  scale_y_continuous(limits = c(0, 5)) +
+  scale_color_manual(values = c("darkgreen"))
+
+plot = grid.arrange(plot1, plot2,  ncol = 2)
+
+ggsave("..figure/kl_ce_comparison.png", plot = plot, width =8, height = 3)
diff --git a/slides/information-theory/slides-info-cross-entropy-kld.tex b/slides/information-theory/slides-info-cross-entropy-kld.tex
@@ -61,6 +61,31 @@
 \item Can now become negative, as the $h(p)$ can be negative! 
 \end{itemize}
 \end{vbframe}
+
+\begin{vbframe} {Cross-Entropy Example}
+
+Let $p(x)=N(0,1)$ and $q(x)=LP(0, 3)$. We can visualize 
+$$
+H(p \| q) = H(p) + D_{KL}(p \| q)
+$$
+\begin{center}
+    \includegraphics[width = 0.8\textwidth]{figure/cross_entropy_plot_1.png}
+\end{center}
+
+\end{vbframe}
+
+\begin{vbframe} {Cross-Entropy Example}
+
+Let $p(x)=LP(0, 3)$ and $q(x)=N(0,1)$. We can visualize 
+$$
+H(p \| q) = H(p) + D_{KL}(p \| q)
+$$
+
+\begin{center}
+	\includegraphics[width = 0.8\textwidth]{figure/cross_entropy_plot_2.png}
+\end{center}
+
+\end{vbframe}
 
 \begin{vbframe}{Proof: Maximum of Differential Entropy}
   \textbf{Claim}: For a given variance, the continuous distribution that maximizes differential entropy is the Gaussian.

diff --git a/slides/information-theory/slides-info-ml.tex b/slides/information-theory/slides-info-ml.tex
@@ -47,6 +47,16 @@
   \end{itemize}
 \end{vbframe}
 
+\begin{vbframe}{KL vs Cross-Entropy Example}
+Let $p(x)=N(0,1)$ and $q(x)=LP(0,3)$ and consider again
+$$ \argmin_{\thetab} D_{KL}(p \| q_{\thetab}) = \argmin_{\thetab} -\E_{X \sim p} \log q(x|\thetab) = \argmin_{\thetab} H(p \| q_{\thetab}) $$
+
+\begin{center}
+	\includegraphics[width=1\textwidth]{figure/kl_ce_comparison.png}
+\end{center}
+
+\end{vbframe}
+
 \begin{vbframe}{Cross-Entropy vs. Log-Loss}
 
   \begin{itemize}