Skip to content

Commit

Permalink
Merge pull request RemyDegenne#131 from RemyDegenne/blueprint
Browse files Browse the repository at this point in the history
Group nodes in the dependency graph
  • Loading branch information
RemyDegenne authored Aug 29, 2024
2 parents 0962e2f + 145872c commit 89932cd
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 30 deletions.
1 change: 0 additions & 1 deletion blueprint/src/sections/f_divergence.tex
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,6 @@ \subsection{Conditional f-divergence}
lem:fDiv_compProd_ne_top_iff,
lem:fDiv_compProd_left
}
TODO: continue listing lemmas here.
\end{proof}


Expand Down
26 changes: 25 additions & 1 deletion blueprint/src/sections/hellinger_alpha.tex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ \section{Hellinger alpha-divergences}
\label{def:hellingerAlpha}
\lean{ProbabilityTheory.hellingerDiv}
\leanok
\uses{def:KL, def:fDiv, lem:fDiv_properties}
\uses{def:KL, def:fDiv, lem:fDiv_properties, lem:kl_properties}
Let $\mu, \nu$ be two measures on $\mathcal X$. The Hellinger divergence of order $\alpha \in [0,+\infty)$ between $\mu$ and $\nu$ is
\begin{align*}
\He_\alpha(\mu, \nu) = \left\{
Expand Down Expand Up @@ -121,6 +121,7 @@ \subsection{Properties inherited from f-divergences}
Apply Theorem~\ref{thm:fDiv_data_proc_2}.
\end{proof}


\begin{lemma}
\label{lem:hellingerAlpha_nonneg}
\lean{ProbabilityTheory.hellingerDiv_nonneg}
Expand All @@ -134,6 +135,7 @@ \subsection{Properties inherited from f-divergences}
Apply Lemma~\ref{lem:fDiv_nonneg}.
\end{proof}


\begin{lemma}
\label{lem:hellingerAlpha_convex}
%\lean{}
Expand All @@ -146,3 +148,25 @@ \subsection{Properties inherited from f-divergences}
\uses{thm:fDiv_convex}
Apply Theorem~\ref{thm:fDiv_convex}
\end{proof}



TODO: find a way to hide the following dummy lemma
\begin{lemma}[Dummy lemma: hellingerAlpha properties]
\label{lem:hellingerAlpha_properties}
%\lean{}
\leanok
\uses{def:hellingerAlpha, def:condHellingerAlpha}
Dummy node to summarize properties of the Hellinger $\alpha$-divergence.
\end{lemma}

\begin{proof}\leanok
\uses{
lem:hellingerAlpha_ne_top_of_lt_one,
lem:hellingerAlpha_eq_integral,
lem:hellingerAlpha_symm,
thm:hellingerAlpha_data_proc,
lem:hellingerAlpha_nonneg,
lem:hellingerAlpha_convex
}
\end{proof}
2 changes: 1 addition & 1 deletion blueprint/src/sections/jensen_shannon.tex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ \section{Jensen-Shannon divergence}
\label{def:jensenShannon}
%\lean{}
%\leanok
\uses{def:KL}
\uses{def:KL, lem:kl_properties}
The Jensen-Shannon divergence indexed by $\alpha \in (0,1)$ between two measures $\mu$ and $\nu$ is
\begin{align*}
\JS_\alpha(\mu, \nu) = \alpha \KL(\mu, \alpha \mu + (1 - \alpha)\nu) + (1 - \alpha) \KL(\nu, \alpha \mu + (1 - \alpha)\nu) \: .
Expand Down
51 changes: 51 additions & 0 deletions blueprint/src/sections/kl_divergence.tex
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ \section{Kullback-Leibler divergence}
if $\mu \ll \nu$ and $x \mapsto \log \frac{d \mu}{d \nu}(x)$ is $\mu$-integrable and $+\infty$ otherwise.
\end{definition}


\begin{lemma}
\label{lem:kl_eq_fDiv}
\lean{ProbabilityTheory.kl_eq_fDiv}
Expand All @@ -24,6 +25,7 @@ \section{Kullback-Leibler divergence}
Simple computation.
\end{proof}


\begin{definition}[Conditional Kullback-Leibler divergence]
\label{def:condKL}
\lean{ProbabilityTheory.condKL}
Expand All @@ -36,6 +38,7 @@ \section{Kullback-Leibler divergence}
if $x \mapsto \KL(\kappa(x), \eta(x))$ is $\mu$-integrable and $+\infty$ otherwise.
\end{definition}


\begin{lemma}
\label{lem:condKL_eq_condFDiv}
\lean{ProbabilityTheory.kl_eq_fDiv}
Expand All @@ -48,6 +51,8 @@ \section{Kullback-Leibler divergence}
Simple computation.
\end{proof}



\subsection{Properties inherited from f-divergences}

Since $\KL$ is an f-divergence, every inequality for f-divergences can be translated to $\KL$.
Expand Down Expand Up @@ -79,6 +84,7 @@ \subsection{Properties inherited from f-divergences}
KL is an f-divergence thanks to Lemma~\ref{lem:kl_eq_fDiv} and Lemma~\ref{lem:condKL_eq_condFDiv}, then apply Lemma~\ref{lem:condFDiv_const}.
\end{proof}


\begin{theorem}[Marginals]
\label{thm:kl_fst_le}
\lean{ProbabilityTheory.kl_fst_le, ProbabilityTheory.kl_snd_le}
Expand All @@ -94,6 +100,7 @@ \subsection{Properties inherited from f-divergences}
Apply Theorem~\ref{thm:fDiv_fst_le_2}.
\end{proof}


\begin{theorem}[Data-processing]
\label{thm:kl_data_proc}
\lean{ProbabilityTheory.kl_comp_right_le}
Expand All @@ -108,6 +115,7 @@ \subsection{Properties inherited from f-divergences}
Apply Theorem~\ref{thm:fDiv_data_proc_2}.
\end{proof}


\begin{lemma}
\label{lem:kl_ge}
\lean{ProbabilityTheory.kl_ge_mul_log}
Expand All @@ -120,6 +128,7 @@ \subsection{Properties inherited from f-divergences}
\uses{lem:le_fDiv, lem:kl_eq_fDiv}
\end{proof}


\begin{lemma}[Gibbs' inequality]
\label{lem:kl_nonneg}
\lean{ProbabilityTheory.kl_nonneg}
Expand All @@ -133,6 +142,7 @@ \subsection{Properties inherited from f-divergences}
Apply Lemma~\ref{lem:kl_ge} and use $\mu(\mathcal X) = \nu (\mathcal X)$.
\end{proof}


\begin{lemma}[Converse Gibbs' inequality]
\label{lem:kl_eq_zero_iff}
\lean{ProbabilityTheory.kl_eq_zero_iff'}
Expand All @@ -146,6 +156,7 @@ \subsection{Properties inherited from f-divergences}
KL is an f-divergence thanks to Lemma~\ref{lem:kl_eq_fDiv}, then apply Lemma~\ref{lem:fDiv_eq_zero_iff}.
\end{proof}


\begin{lemma}
\label{lem:kl_convex}
%\lean{}
Expand All @@ -159,6 +170,7 @@ \subsection{Properties inherited from f-divergences}
Apply Theorem~\ref{thm:fDiv_convex}
\end{proof}


%TODO : this may be moved to another section about integrability
\begin{lemma}
\label{lem:integrable_llr_compProd_iff}
Expand Down Expand Up @@ -276,6 +288,7 @@ \subsection{Chain rule and tensorization}
Write $\mu = \mu_X \otimes \mu_{Y|X}$ and $\nu = \nu_X \otimes \nu_{Y|X}$, then use Theorem~\ref{thm:kl_compProd}.
\end{proof}


\begin{lemma}
\label{lem:kl_prod_two'}
\lean{ProbabilityTheory.kl_prod_two'}
Expand All @@ -290,6 +303,7 @@ \subsection{Chain rule and tensorization}
Write $\mu_1 \times \mu_2$ and $\nu_1 \times \nu_2$ as composition products of a measure and a constant kernel, then apply the chain rule (Theorem~\ref{thm:kl_compProd}) and Lemma~\ref{lem:condKL_const}.
\end{proof}


\begin{theorem}[Tensorization]
\label{thm:kl_prod_two}
\lean{ProbabilityTheory.kl_prod_two}
Expand All @@ -306,6 +320,7 @@ \subsection{Chain rule and tensorization}
This is a particular case of Lemma~\ref{lem:kl_prod_two'}.
\end{proof}


\begin{theorem}[Tensorization - finite product]
\label{thm:kl_pi}
\lean{ProbabilityTheory.kl_pi}
Expand All @@ -323,6 +338,7 @@ \subsection{Chain rule and tensorization}
Induction on the size of $I$, using Theorem~\ref{thm:kl_prod_two}.
\end{proof}


\begin{lemma}
\label{lem:kl_chain_rule_cond_event}
%\lean{}
Expand Down Expand Up @@ -353,6 +369,7 @@ \subsection{Chain rule and tensorization}

\end{proof}


\begin{lemma}
\label{lem:expectation_llr_event}
%\lean{}
Expand All @@ -366,3 +383,37 @@ \subsection{Chain rule and tensorization}
\uses{lem:kl_ge}
Apply Lemma~\ref{lem:kl_ge} to the measures $\mu$ and $\nu$ restricted to $E$. $\mu\left[\mathbb{I}(E)\log \frac{d \mu}{d \nu}\right]$ is the KL between those two distributions.
\end{proof}




TODO: find a way to hide the following dummy lemma
\begin{lemma}[Dummy lemma: KL properties]
\label{lem:kl_properties}
%\lean{}
\leanok
\uses{def:KL, def:condKL}
Dummy node to summarize properties of the Kullback-Leibler divergence.
\end{lemma}

\begin{proof}\leanok
\uses{
lem:kl_eq_fDiv,
lem:condKL_eq_condFDiv,
lem:kl_self,
lem:condKL_const,
thm:kl_fst_le,
thm:kl_data_proc,
lem:kl_ge,
lem:kl_nonneg,
lem:kl_eq_zero_iff,
lem:kl_convex,
thm:kl_compProd,
thm:kl_compProd_bayesInv,
thm:kl_fst_add_condKL,
lem:kl_prod_two',
thm:kl_prod_two,
thm:kl_pi,
lem:kl_chain_rule_cond_event
}
\end{proof}
8 changes: 4 additions & 4 deletions blueprint/src/sections/mutual_info.tex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
\label{def:fMutualInfo}
%\lean{}
%\leanok
\uses{def:fDiv}
\uses{def:fDiv, lem:fDiv_properties}
The $f$-mutual information associated with an $f$-divergence is, for $\rho \in \mathcal M(\mathcal X \times \mathcal Y)$~,
\begin{align*}
I_f(\rho) = D_f(\rho, \rho_X \times \rho_Y) \: .
Expand All @@ -21,7 +21,7 @@
\label{def:mutualInfo}
%\lean{}
%\leanok
\uses{def:KL, def:fMutualInfo}
\uses{def:KL, def:fMutualInfo, lem:kl_properties}
The mutual information is, for $\rho \in \mathcal M(\mathcal X \times \mathcal Y)$~,
\begin{align*}
I(\rho) = \KL(\rho, \rho_X \times \rho_Y) \: .
Expand All @@ -34,7 +34,7 @@
\label{def:condFMutualInfo}
%\lean{}
%\leanok
\uses{def:condFDiv}
\uses{def:condFDiv, lem:fDiv_properties}
Let $\kappa : \mathcal Z \rightsquigarrow \mathcal X \times \mathcal Y$. The conditional $f$-mutual information of $\kappa$ with respect to $\nu \in \mathcal M(\mathcal Z)$ is
\begin{align*}
I_f(\kappa \mid \nu) = D_f(\kappa, \kappa_X \times \kappa_Y \mid \nu)
Expand All @@ -47,7 +47,7 @@
\label{def:condMutualInfo}
%\lean{}
%\leanok
\uses{def:condKL, def:condFMutualInfo}
\uses{def:condKL, def:condFMutualInfo, lem:kl_properties}
Let $\kappa : \mathcal Z \rightsquigarrow \mathcal X \times \mathcal Y$. The conditional mutual information of $\kappa$ with respect to $\nu \in \mathcal M(\mathcal Z)$ is
\begin{align*}
I(\kappa \mid \nu) = \KL(\kappa, \kappa_X \times \kappa_Y \mid \nu) \: .
Expand Down
23 changes: 21 additions & 2 deletions blueprint/src/sections/renyi_divergence.tex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ \section{Rényi divergences}
\label{def:Renyi}
\lean{ProbabilityTheory.renyiDiv}
\leanok
\uses{def:KL, def:hellingerAlpha}
\uses{def:KL, def:hellingerAlpha, lem:hellingerAlpha_properties}
Let $\mu, \nu$ be two measures on $\mathcal X$. The Rényi divergence of order $\alpha \in \mathbb{R}$ between $\mu$ and $\nu$ is
\begin{align*}
R_\alpha(\mu, \nu) = \left\{
Expand Down Expand Up @@ -128,7 +128,7 @@ \section{Rényi divergences}
\label{def:condRenyi}
\lean{ProbabilityTheory.condRenyiDiv}
\leanok
\uses{def:condHellingerAlpha}
\uses{def:condHellingerAlpha, lem:hellingerAlpha_properties}
Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two kernels. The conditional Rényi divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is
\begin{align*}
R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\kappa, \eta \mid \mu)) \: .
Expand All @@ -137,6 +137,9 @@ \section{Rényi divergences}

TODO: the reasons for this definition is that it is equal to $R_\alpha(\mu \otimes \kappa, \mu \otimes \eta)$, which is the generic conditional divergence definition, and also that this is the definition for which we have a chain rule.




\subsection{Properties inherited from f-divergences}

Since Rényi divergences are monotone transfomations of f-divergences, every inequality for f-divergences can be translated to Rényi divergences.
Expand Down Expand Up @@ -169,6 +172,9 @@ \subsection{Properties inherited from f-divergences}
By Corollary~\ref{cor:data_proc_event}, $D_f(\mu, \nu) \ge D_f(\mu_E, \nu_E)$, hence $R_\alpha(\mu, \nu) \ge R_\alpha(\mu_E, \nu_E)$.
\end{proof}




\subsection{Comparisons between orders}

\begin{lemma}
Expand Down Expand Up @@ -226,6 +232,9 @@ \subsection{Comparisons between orders}
\begin{proof}
\end{proof}




\subsection{The tilted measure}

\begin{definition}
Expand All @@ -239,6 +248,7 @@ \subsection{The tilted measure}
\end{align*}
\end{definition}


\begin{lemma}
\label{lem:renyiMeasure_ac}
%\lean{}
Expand All @@ -252,6 +262,7 @@ \subsection{The tilted measure}

\end{proof}


\begin{lemma}
\label{lem:rnDeriv_renyiMeasure}
%\lean{}
Expand All @@ -267,6 +278,7 @@ \subsection{The tilted measure}

\end{proof}


\begin{lemma}
\label{lem:kl_renyiMeasure_eq}
%\lean{}
Expand All @@ -283,6 +295,7 @@ \subsection{The tilted measure}
Unfold definitions and compute?
\end{proof}


\begin{corollary}
\label{cor:renyi_eq_add_kl}
%\lean{}
Expand All @@ -299,6 +312,7 @@ \subsection{The tilted measure}
Apply Lemma~\ref{lem:kl_renyiMeasure_eq} to $\xi = \mu^{(\alpha, \nu)}$.
\end{proof}


\begin{lemma}
\label{lem:renyi_eq_inf_add_kl}
%\lean{}
Expand Down Expand Up @@ -348,6 +362,8 @@ \subsection{The tilted measure}
The result is a rephrasing of Lemma~\ref{lem:renyi_eq_inf_add_kl}.
\end{proof}



\subsection{Chain rule and tensorization}

\begin{theorem}[Chain rule]
Expand Down Expand Up @@ -398,6 +414,7 @@ \subsection{Chain rule and tensorization}
Apply Theorem~\ref{thm:renyi_chain_rule} (the product of measures is a special case of $\otimes$). TODO: more details.
\end{proof}


\begin{theorem}[Tensorization - finite product]
\label{thm:renyi_prod}
%\lean{}
Expand All @@ -412,6 +429,7 @@ \subsection{Chain rule and tensorization}
Induction over the finite index set, using Corollary~\ref{cor:renyi_prod_two}.
\end{proof}


\begin{corollary}
\label{lem:renyi_prod_n}
%\lean{}
Expand All @@ -426,6 +444,7 @@ \subsection{Chain rule and tensorization}
Apply Theorem~\ref{thm:renyi_prod}.
\end{proof}


\begin{theorem}[Tensorization - countable product]
\label{thm:renyi_prod_countable}
%\lean{}
Expand Down
Loading

0 comments on commit 89932cd

Please sign in to comment.