Merge pull request RemyDegenne#131 from RemyDegenne/blueprint

Group nodes in the dependency graph
LorenzoLuccioli · Aug 29, 2024 · 89932cd · 89932cd
2 parents 0962e2f + 145872c
commit 89932cd
Show file tree

Hide file tree

Showing 8 changed files with 125 additions and 30 deletions.
diff --git a/blueprint/src/sections/f_divergence.tex b/blueprint/src/sections/f_divergence.tex
@@ -528,7 +528,6 @@ \subsection{Conditional f-divergence}
   lem:fDiv_compProd_ne_top_iff,
   lem:fDiv_compProd_left
 }
-TODO: continue listing lemmas here.
 \end{proof}
 
 

diff --git a/blueprint/src/sections/hellinger_alpha.tex b/blueprint/src/sections/hellinger_alpha.tex
@@ -4,7 +4,7 @@ \section{Hellinger alpha-divergences}
   \label{def:hellingerAlpha}
   \lean{ProbabilityTheory.hellingerDiv}
   \leanok
-  \uses{def:KL, def:fDiv, lem:fDiv_properties}
+  \uses{def:KL, def:fDiv, lem:fDiv_properties, lem:kl_properties}
   Let $\mu, \nu$ be two measures on $\mathcal X$. The Hellinger divergence of order $\alpha \in [0,+\infty)$ between $\mu$ and $\nu$ is
   \begin{align*}
   \He_\alpha(\mu, \nu) = \left\{
@@ -121,6 +121,7 @@ \subsection{Properties inherited from f-divergences}
 Apply Theorem~\ref{thm:fDiv_data_proc_2}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:hellingerAlpha_nonneg}
   \lean{ProbabilityTheory.hellingerDiv_nonneg}
@@ -134,6 +135,7 @@ \subsection{Properties inherited from f-divergences}
 Apply Lemma~\ref{lem:fDiv_nonneg}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:hellingerAlpha_convex}
   %\lean{}
@@ -146,3 +148,25 @@ \subsection{Properties inherited from f-divergences}
 \uses{thm:fDiv_convex}
 Apply Theorem~\ref{thm:fDiv_convex}
 \end{proof}
+
+
+
+TODO: find a way to hide the following dummy lemma
+\begin{lemma}[Dummy lemma: hellingerAlpha properties]
+  \label{lem:hellingerAlpha_properties}
+  %\lean{}
+  \leanok
+  \uses{def:hellingerAlpha, def:condHellingerAlpha}
+  Dummy node to summarize properties of the Hellinger $\alpha$-divergence.
+\end{lemma}
+
+\begin{proof}\leanok
+\uses{
+  lem:hellingerAlpha_ne_top_of_lt_one,
+  lem:hellingerAlpha_eq_integral,
+  lem:hellingerAlpha_symm,
+  thm:hellingerAlpha_data_proc,
+  lem:hellingerAlpha_nonneg,
+  lem:hellingerAlpha_convex
+}
+\end{proof}
diff --git a/blueprint/src/sections/jensen_shannon.tex b/blueprint/src/sections/jensen_shannon.tex
@@ -4,7 +4,7 @@ \section{Jensen-Shannon divergence}
   \label{def:jensenShannon}
   %\lean{}
   %\leanok
-  \uses{def:KL}
+  \uses{def:KL, lem:kl_properties}
   The Jensen-Shannon divergence indexed by $\alpha \in (0,1)$ between two measures $\mu$ and $\nu$ is
   \begin{align*}
     \JS_\alpha(\mu, \nu) = \alpha \KL(\mu, \alpha \mu + (1 - \alpha)\nu) + (1 - \alpha) \KL(\nu, \alpha \mu + (1 - \alpha)\nu) \: .

diff --git a/blueprint/src/sections/kl_divergence.tex b/blueprint/src/sections/kl_divergence.tex
@@ -12,6 +12,7 @@ \section{Kullback-Leibler divergence}
   if $\mu \ll \nu$ and $x \mapsto \log \frac{d \mu}{d \nu}(x)$ is $\mu$-integrable and $+\infty$ otherwise.
 \end{definition}
 
+
 \begin{lemma}
   \label{lem:kl_eq_fDiv}
   \lean{ProbabilityTheory.kl_eq_fDiv}
@@ -24,6 +25,7 @@ \section{Kullback-Leibler divergence}
 Simple computation.
 \end{proof}
 
+
 \begin{definition}[Conditional Kullback-Leibler divergence]
   \label{def:condKL}
   \lean{ProbabilityTheory.condKL}
@@ -36,6 +38,7 @@ \section{Kullback-Leibler divergence}
   if $x \mapsto \KL(\kappa(x), \eta(x))$ is $\mu$-integrable and $+\infty$ otherwise.
 \end{definition}
 
+
 \begin{lemma}
   \label{lem:condKL_eq_condFDiv}
   \lean{ProbabilityTheory.kl_eq_fDiv}
@@ -48,6 +51,8 @@ \section{Kullback-Leibler divergence}
 Simple computation.
 \end{proof}
 
+
+
 \subsection{Properties inherited from f-divergences}
 
 Since $\KL$ is an f-divergence, every inequality for f-divergences can be translated to $\KL$.
@@ -79,6 +84,7 @@ \subsection{Properties inherited from f-divergences}
   KL is an f-divergence thanks to Lemma~\ref{lem:kl_eq_fDiv} and Lemma~\ref{lem:condKL_eq_condFDiv}, then apply Lemma~\ref{lem:condFDiv_const}.
 \end{proof}
 
+
 \begin{theorem}[Marginals]
   \label{thm:kl_fst_le}
   \lean{ProbabilityTheory.kl_fst_le, ProbabilityTheory.kl_snd_le}
@@ -94,6 +100,7 @@ \subsection{Properties inherited from f-divergences}
 Apply Theorem~\ref{thm:fDiv_fst_le_2}.
 \end{proof}
 
+
 \begin{theorem}[Data-processing]
   \label{thm:kl_data_proc}
   \lean{ProbabilityTheory.kl_comp_right_le}
@@ -108,6 +115,7 @@ \subsection{Properties inherited from f-divergences}
 Apply Theorem~\ref{thm:fDiv_data_proc_2}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:kl_ge}
   \lean{ProbabilityTheory.kl_ge_mul_log}
@@ -120,6 +128,7 @@ \subsection{Properties inherited from f-divergences}
 \uses{lem:le_fDiv, lem:kl_eq_fDiv}
 \end{proof}
 
+
 \begin{lemma}[Gibbs' inequality]
   \label{lem:kl_nonneg}
   \lean{ProbabilityTheory.kl_nonneg}
@@ -133,6 +142,7 @@ \subsection{Properties inherited from f-divergences}
 Apply Lemma~\ref{lem:kl_ge} and use $\mu(\mathcal X) = \nu (\mathcal X)$.
 \end{proof}
 
+
 \begin{lemma}[Converse Gibbs' inequality]
   \label{lem:kl_eq_zero_iff}
   \lean{ProbabilityTheory.kl_eq_zero_iff'}
@@ -146,6 +156,7 @@ \subsection{Properties inherited from f-divergences}
 KL is an f-divergence thanks to Lemma~\ref{lem:kl_eq_fDiv}, then apply Lemma~\ref{lem:fDiv_eq_zero_iff}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:kl_convex}
   %\lean{}
@@ -159,6 +170,7 @@ \subsection{Properties inherited from f-divergences}
 Apply Theorem~\ref{thm:fDiv_convex}
 \end{proof}
 
+
 %TODO : this may be moved to another section about integrability
 \begin{lemma}
   \label{lem:integrable_llr_compProd_iff}
@@ -276,6 +288,7 @@ \subsection{Chain rule and tensorization}
 Write $\mu = \mu_X \otimes \mu_{Y|X}$ and $\nu = \nu_X \otimes \nu_{Y|X}$, then use Theorem~\ref{thm:kl_compProd}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:kl_prod_two'}
   \lean{ProbabilityTheory.kl_prod_two'}
@@ -290,6 +303,7 @@ \subsection{Chain rule and tensorization}
 Write $\mu_1 \times \mu_2$ and $\nu_1 \times \nu_2$ as composition products of a measure and a constant kernel, then apply the chain rule (Theorem~\ref{thm:kl_compProd}) and Lemma~\ref{lem:condKL_const}.
 \end{proof}
 
+
 \begin{theorem}[Tensorization]
   \label{thm:kl_prod_two}
   \lean{ProbabilityTheory.kl_prod_two}
@@ -306,6 +320,7 @@ \subsection{Chain rule and tensorization}
 This is a particular case of Lemma~\ref{lem:kl_prod_two'}.
 \end{proof}
 
+
 \begin{theorem}[Tensorization - finite product]
   \label{thm:kl_pi}
   \lean{ProbabilityTheory.kl_pi}
@@ -323,6 +338,7 @@ \subsection{Chain rule and tensorization}
 Induction on the size of $I$, using Theorem~\ref{thm:kl_prod_two}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:kl_chain_rule_cond_event}
   %\lean{}
@@ -353,6 +369,7 @@ \subsection{Chain rule and tensorization}
 
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:expectation_llr_event}
   %\lean{}
@@ -366,3 +383,37 @@ \subsection{Chain rule and tensorization}
 \uses{lem:kl_ge}
 Apply Lemma~\ref{lem:kl_ge} to the measures $\mu$ and $\nu$ restricted to $E$. $\mu\left[\mathbb{I}(E)\log \frac{d \mu}{d \nu}\right]$ is the KL between those two distributions.
 \end{proof}
+
+
+
+
+TODO: find a way to hide the following dummy lemma
+\begin{lemma}[Dummy lemma: KL properties]
+  \label{lem:kl_properties}
+  %\lean{}
+  \leanok
+  \uses{def:KL, def:condKL}
+  Dummy node to summarize properties of the Kullback-Leibler divergence.
+\end{lemma}
+
+\begin{proof}\leanok
+\uses{
+  lem:kl_eq_fDiv,
+  lem:condKL_eq_condFDiv,
+  lem:kl_self,
+  lem:condKL_const,
+  thm:kl_fst_le,
+  thm:kl_data_proc,
+  lem:kl_ge,
+  lem:kl_nonneg,
+  lem:kl_eq_zero_iff,
+  lem:kl_convex,
+  thm:kl_compProd,
+  thm:kl_compProd_bayesInv,
+  thm:kl_fst_add_condKL,
+  lem:kl_prod_two',
+  thm:kl_prod_two,
+  thm:kl_pi,
+  lem:kl_chain_rule_cond_event
+}
+\end{proof}
diff --git a/blueprint/src/sections/mutual_info.tex b/blueprint/src/sections/mutual_info.tex
@@ -5,7 +5,7 @@
   \label{def:fMutualInfo}
   %\lean{}
   %\leanok
-  \uses{def:fDiv}
+  \uses{def:fDiv, lem:fDiv_properties}
   The $f$-mutual information associated with an $f$-divergence is, for $\rho \in \mathcal M(\mathcal X \times \mathcal Y)$~,
   \begin{align*}
   I_f(\rho) = D_f(\rho, \rho_X \times \rho_Y) \: .
@@ -21,7 +21,7 @@
   \label{def:mutualInfo}
   %\lean{}
   %\leanok
-  \uses{def:KL, def:fMutualInfo}
+  \uses{def:KL, def:fMutualInfo, lem:kl_properties}
   The mutual information is, for $\rho \in \mathcal M(\mathcal X \times \mathcal Y)$~,
   \begin{align*}
   I(\rho) = \KL(\rho, \rho_X \times \rho_Y) \: .
@@ -34,7 +34,7 @@
   \label{def:condFMutualInfo}
   %\lean{}
   %\leanok
-  \uses{def:condFDiv}
+  \uses{def:condFDiv, lem:fDiv_properties}
   Let $\kappa : \mathcal Z \rightsquigarrow \mathcal X \times \mathcal Y$. The conditional $f$-mutual information of $\kappa$ with respect to $\nu \in \mathcal M(\mathcal Z)$ is
   \begin{align*}
   I_f(\kappa \mid \nu) = D_f(\kappa, \kappa_X \times \kappa_Y \mid \nu)
@@ -47,7 +47,7 @@
   \label{def:condMutualInfo}
   %\lean{}
   %\leanok
-  \uses{def:condKL, def:condFMutualInfo}
+  \uses{def:condKL, def:condFMutualInfo, lem:kl_properties}
   Let $\kappa : \mathcal Z \rightsquigarrow \mathcal X \times \mathcal Y$. The conditional mutual information of $\kappa$ with respect to $\nu \in \mathcal M(\mathcal Z)$ is
   \begin{align*}
   I(\kappa \mid \nu) = \KL(\kappa, \kappa_X \times \kappa_Y \mid \nu) \: .

diff --git a/blueprint/src/sections/renyi_divergence.tex b/blueprint/src/sections/renyi_divergence.tex
@@ -4,7 +4,7 @@ \section{Rényi divergences}
   \label{def:Renyi}
   \lean{ProbabilityTheory.renyiDiv}
   \leanok
-  \uses{def:KL, def:hellingerAlpha}
+  \uses{def:KL, def:hellingerAlpha, lem:hellingerAlpha_properties}
   Let $\mu, \nu$ be two measures on $\mathcal X$. The Rényi divergence of order $\alpha \in \mathbb{R}$ between $\mu$ and $\nu$ is
   \begin{align*}
   R_\alpha(\mu, \nu) = \left\{
@@ -128,7 +128,7 @@ \section{Rényi divergences}
   \label{def:condRenyi}
   \lean{ProbabilityTheory.condRenyiDiv}
   \leanok
-  \uses{def:condHellingerAlpha}
+  \uses{def:condHellingerAlpha, lem:hellingerAlpha_properties}
   Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two kernels. The conditional Rényi divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is
   \begin{align*}
   R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\kappa, \eta \mid \mu)) \: .
@@ -137,6 +137,9 @@ \section{Rényi divergences}
 
 TODO: the reasons for this definition is that it is equal to $R_\alpha(\mu \otimes \kappa, \mu \otimes \eta)$, which is the generic conditional divergence definition, and also that this is the definition for which we have a chain rule.
 
+
+
+
 \subsection{Properties inherited from f-divergences}
 
 Since Rényi divergences are monotone transfomations of f-divergences, every inequality for f-divergences can be translated to Rényi divergences.
@@ -169,6 +172,9 @@ \subsection{Properties inherited from f-divergences}
 By Corollary~\ref{cor:data_proc_event}, $D_f(\mu, \nu) \ge D_f(\mu_E, \nu_E)$, hence $R_\alpha(\mu, \nu) \ge R_\alpha(\mu_E, \nu_E)$.
 \end{proof}
 
+
+
+
 \subsection{Comparisons between orders}
 
 \begin{lemma}
@@ -226,6 +232,9 @@ \subsection{Comparisons between orders}
 \begin{proof}
 \end{proof}
 
+
+
+
 \subsection{The tilted measure}
 
 \begin{definition}
@@ -239,6 +248,7 @@ \subsection{The tilted measure}
   \end{align*}
 \end{definition}
 
+
 \begin{lemma}
   \label{lem:renyiMeasure_ac}
   %\lean{}
@@ -252,6 +262,7 @@ \subsection{The tilted measure}
 
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:rnDeriv_renyiMeasure}
   %\lean{}
@@ -267,6 +278,7 @@ \subsection{The tilted measure}
 
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:kl_renyiMeasure_eq}
   %\lean{}
@@ -283,6 +295,7 @@ \subsection{The tilted measure}
 Unfold definitions and compute?
 \end{proof}
 
+
 \begin{corollary}
   \label{cor:renyi_eq_add_kl}
   %\lean{}
@@ -299,6 +312,7 @@ \subsection{The tilted measure}
 Apply Lemma~\ref{lem:kl_renyiMeasure_eq} to $\xi = \mu^{(\alpha, \nu)}$.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:renyi_eq_inf_add_kl}
   %\lean{}
@@ -348,6 +362,8 @@ \subsection{The tilted measure}
 The result is a rephrasing of Lemma~\ref{lem:renyi_eq_inf_add_kl}.
 \end{proof}
 
+
+
 \subsection{Chain rule and tensorization}
 
 \begin{theorem}[Chain rule]
@@ -398,6 +414,7 @@ \subsection{Chain rule and tensorization}
 Apply Theorem~\ref{thm:renyi_chain_rule} (the product of measures is a special case of $\otimes$). TODO: more details.
 \end{proof}
 
+
 \begin{theorem}[Tensorization - finite product]
   \label{thm:renyi_prod}
   %\lean{}
@@ -412,6 +429,7 @@ \subsection{Chain rule and tensorization}
 Induction over the finite index set, using Corollary~\ref{cor:renyi_prod_two}.
 \end{proof}
 
+
 \begin{corollary}
   \label{lem:renyi_prod_n}
   %\lean{}
@@ -426,6 +444,7 @@ \subsection{Chain rule and tensorization}
 Apply Theorem~\ref{thm:renyi_prod}.
 \end{proof}
 
+
 \begin{theorem}[Tensorization - countable product]
   \label{thm:renyi_prod_countable}
   %\lean{}