Merge remote-tracking branch 'upstream/master' into TestLeanCopilot

LorenzoLuccioli · Aug 29, 2024 · 8a50cba · 8a50cba
2 parents aa32fcb + 7dbb831
commit 8a50cba
Show file tree

Hide file tree

Showing 15 changed files with 950 additions and 334 deletions.
diff --git a/blueprint/src/content.tex b/blueprint/src/content.tex
@@ -48,4 +48,4 @@ \chapter{The risk of simple binary hypothesis testing}
 \input{sections/kernels}
 \input{sections/rn_deriv}
 \input{sections/convex}
-\input{sections/deficiency}
+%\input{sections/deficiency}
diff --git a/blueprint/src/macros/print.tex b/blueprint/src/macros/print.tex
@@ -13,6 +13,7 @@
 % Dummy macros that make sense only for web version.
 \newcommand{\lean}[1]{}
 \newcommand{\leanok}{}
+\newcommand{\mathlibok}{}
 % Make sure that arguments of \uses and \proves are real labels, by using invisible refs:
 % latex prints a warning if the label is not defined, but nothing is shown in the pdf file.
 % It uses LaTeX3 programming, this is why we use the expl3 package.

diff --git a/blueprint/src/sections/chernoff.tex b/blueprint/src/sections/chernoff.tex
@@ -4,7 +4,7 @@ \section{Chernoff divergence}
   \label{def:Chernoff}
   \lean{ProbabilityTheory.chernoffDiv}
   \leanok
-  \uses{def:Renyi}
+  \uses{def:Renyi, lem:renyi_properties}
     The Chernoff divergence of order $\alpha > 0$ between two measures $\mu$ and $\nu$ on $\mathcal X$ is
   \begin{align*}
     C_\alpha(\mu, \nu) = \inf_{\xi \in \mathcal P(\mathcal X)}\max\{R_\alpha(\xi, \mu), R_\alpha(\xi, \nu)\} \: .
@@ -28,6 +28,7 @@ \section{Chernoff divergence}
 This is $R_1 = \KL$ (by definition).
 \end{proof}
 
+
 \begin{lemma}[Symmetry]
   \label{lem:chernoff_symm}
   %\lean{}
@@ -40,6 +41,7 @@ \section{Chernoff divergence}
 Immediate from the definition.
 \end{proof}
 
+
 \begin{lemma}[Monotonicity]
   \label{lem:chernoff_mono}
   %\lean{}
@@ -53,6 +55,7 @@ \section{Chernoff divergence}
 Consequence of Lemma~\ref{lem:renyi_monotone}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:chernoff_eq_max_renyi}
   %\lean{}

diff --git a/blueprint/src/sections/f_divergence.tex b/blueprint/src/sections/f_divergence.tex
@@ -19,6 +19,7 @@ \subsection{Definition and basic properties}
   if $x \mapsto f\left(\frac{d \mu}{d \nu}(x)\right)$ is $\nu$-integrable and $+\infty$ otherwise.
 \end{definition}
 
+
 \begin{lemma}
   \label{lem:fDiv_ne_top_iff}
   \lean{ProbabilityTheory.fDiv_ne_top_iff}
@@ -46,6 +47,7 @@ \subsection{Definition and basic properties}
 Compute the integral.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_self}
   \lean{ProbabilityTheory.fDiv_self}
@@ -87,6 +89,7 @@ \subsection{Definition and basic properties}
 $D_{x\mapsto x}(\mu, \nu) = (\frac{d\mu}{d\nu}\cdot \nu)(\mathcal X) + \mu_{\perp \nu}(\mathcal X) = \mu (\mathcal X)$.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_mul}
   \lean{ProbabilityTheory.fDiv_mul}
@@ -99,6 +102,7 @@ \subsection{Definition and basic properties}
 Linearity of the integral.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_add}
   \lean{ProbabilityTheory.fDiv_add}
@@ -111,6 +115,7 @@ \subsection{Definition and basic properties}
 Linearity of the integral.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_add_linear}
   \lean{ProbabilityTheory.fDiv_add_linear}
@@ -124,6 +129,7 @@ \subsection{Definition and basic properties}
 Linearity (Lemmas~\ref{lem:fDiv_add} and~\ref{lem:fDiv_mul}), then Lemma~\ref{lem:fDiv_const} and~\ref{lem:fDiv_id}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_map_measurableEmbedding}
   \lean{ProbabilityTheory.fDiv_map_measurableEmbedding}
@@ -223,6 +229,7 @@ \subsection{Definition and basic properties}
 $\frac{d(\mu_1 + \mu_2)}{d \nu} = \frac{d \mu_1}{d \nu}$ a.e. and $(\mu_1 + \mu_2)_{\perp \nu} = \mu_2$.
 \end{proof}
 
+
 \begin{lemma}[Superseded by Lemma~\ref{lem:fDiv_add_measure_le}]
   \label{lem:fDiv_add_measure_le_of_ac}
   \lean{ProbabilityTheory.fDiv_add_measure_le_of_ac}
@@ -244,6 +251,7 @@ \subsection{Definition and basic properties}
 \end{align*}
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_add_measure_le}
   \lean{ProbabilityTheory.fDiv_add_measure_le}
@@ -267,6 +275,7 @@ \subsection{Definition and basic properties}
 \end{align*}
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_eq_add_withDensity_derivAtTop}
   \lean{ProbabilityTheory.fDiv_eq_add_withDensity_derivAtTop}
@@ -281,6 +290,7 @@ \subsection{Definition and basic properties}
 Apply Lemma~\ref{lem:fDiv_absolutelyContinuous_add_mutuallySingular} to $\frac{d\mu}{d\nu}\cdot \nu$ and $\mu_{\perp \nu}$.
 \end{proof}
 
+
 \begin{lemma}[Superseded by Lemma~\ref{lem:le_fDiv}]
   \label{lem:le_fDiv_of_ac}
   \lean{ProbabilityTheory.le_fDiv_of_ac}
@@ -301,6 +311,7 @@ \subsection{Definition and basic properties}
 
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:le_fDiv}
   \lean{ProbabilityTheory.le_fDiv}
@@ -323,6 +334,7 @@ \subsection{Definition and basic properties}
 \end{align*}
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_nonneg}
   \lean{ProbabilityTheory.fDiv_nonneg}
@@ -337,6 +349,7 @@ \subsection{Definition and basic properties}
 \end{proof}
 
 
+
 \subsection{Conditional f-divergence}
 
 TODO: replace the definition by $D(\mu \otimes \kappa, \mu \otimes \eta)$, which allows for a more general proof of the ``conditioning increases divergence'' inequality.
@@ -353,6 +366,7 @@ \subsection{Conditional f-divergence}
   if $x \mapsto D_f(\kappa(x), \eta(x))$ is $\mu$-integrable and $+\infty$ otherwise.
 \end{definition}
 
+
 \begin{lemma}
   \label{lem:condFDiv_nonneg}
   \lean{ProbabilityTheory.condFDiv_nonneg}
@@ -366,6 +380,7 @@ \subsection{Conditional f-divergence}
 Apply Lemma~\ref{lem:fDiv_nonneg}.
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:condFDiv_const}
   \lean{ProbabilityTheory.condFDiv_const}
@@ -383,6 +398,7 @@ \subsection{Conditional f-divergence}
 = D_f(\mu, \nu) \xi (\mathcal X)$$
 \end{proof}
 
+
 % TODO : this statement is not the same as the lean code, in particular here there is only one measure mentioned, while in the lean we have 2, also the rest of the statement is not the same
 \begin{lemma}
   \label{lem:integrable_fDiv_compProd_iff}
@@ -408,6 +424,7 @@ \subsection{Conditional f-divergence}
 TODO
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:condFDiv_ne_top_iff}
   \lean{ProbabilityTheory.condFDiv_ne_top_iff}
@@ -426,6 +443,7 @@ \subsection{Conditional f-divergence}
 \uses{lem:integrable_fDiv_compProd_iff}
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_compProd_ne_top_iff}
   \lean{ProbabilityTheory.condFDiv_ne_top_iff_fDiv_compProd_ne_top}
@@ -439,6 +457,7 @@ \subsection{Conditional f-divergence}
 \uses{lem:integrable_fDiv_compProd_iff, lem:condFDiv_ne_top_iff}
 \end{proof}
 
+
 \begin{lemma}
   \label{lem:fDiv_compProd_left}
   \lean{ProbabilityTheory.fDiv_compProd_left}
@@ -473,6 +492,52 @@ \subsection{Conditional f-divergence}
 
 
 
+
+TODO: find a way to hide the following dummy lemma
+\begin{lemma}[Dummy lemma: fDiv properties]
+  \label{lem:fDiv_properties}
+  %\lean{}
+  \leanok
+  \uses{def:fDiv, def:condFDiv}
+  Dummy node to summarize properties of $f$-divergences.
+\end{lemma}
+
+\begin{proof}\leanok
+\uses{
+  lem:fDiv_ne_top_iff,
+  lem:fDiv_const,
+  lem:fDiv_self,
+  lem:fDiv_eq_zero_iff,
+  lem:fDiv_id,
+  lem:fDiv_mul,
+  lem:fDiv_add,
+  lem:fDiv_add_linear,
+  lem:fDiv_map_measurableEmbedding,
+  lem:fDiv_symm,
+  lem:fDiv_add_smul_left,
+  lem:fDiv_add_smul_right,
+  lem:fDiv_add_smul_right',
+  lem:fDiv_absolutelyContinuous_add_mutuallySingular,
+  lem:fDiv_add_measure_le,
+  lem:fDiv_eq_add_withDensity_derivAtTop,
+  lem:le_fDiv,
+  lem:fDiv_nonneg,
+  lem:condFDiv_nonneg,
+  lem:condFDiv_const,
+  lem:condFDiv_ne_top_iff,
+  lem:fDiv_compProd_ne_top_iff,
+  lem:fDiv_compProd_left
+}
+\end{proof}
+
+
+
+
+
+
+
+
+
 \subsection{Integral representation of $f$-divergences}
 
 
@@ -918,7 +983,7 @@ \subsubsection{Kernel proof}
 &= D(\mu, \nu)
 \: .
 \end{align*}
-The main drawback of this approach is that it requires the measurable spaces to be standard Borel.
+The main drawback of this approach is that it requires that the Bayesian inverse exists, which is not always the case, unless for example the measurable spaces are standard Borel.
 
 The inequality $D_f(\mu, \nu) \le D_f(\mu \otimes \kappa, \nu \otimes \eta)$ is obtained by describing the Radon-Nikodym derivative $\frac{d(\mu \otimes \kappa)}{d(\nu \otimes \eta)}$ and using convexity properties of the function $f$.
 
@@ -1287,7 +1352,6 @@ \subsection{Convexity}
 
 
 
-
 \subsection{Variational representations}
 
 TODO
@@ -1494,31 +1558,31 @@ \subsection{Statistical divergences}
 This is a reformulation of Theorem~\ref{thm:statInfo_eq_integral}.
 \end{proof}
 
-\begin{lemma}
-  \label{lem:deGrootInfo_eq_fDiv}
-  %\lean{}
-  %\leanok
-  \uses{def:deGrootInfo, def:statInfoFun, def:fDiv}
-  On probability measures, the DeGroot statistical information $I_\pi$ is an $f$-divergence for the function $\phi_{\pi, 1-\pi}$ of Definition~\ref{def:statInfoFun}.
-\end{lemma}
-
-\begin{proof}%\leanok
-\uses{lem:deGrootInfo_eq_integral}
-This is a reformulation of Lemma~\ref{lem:deGrootInfo_eq_integral}.
-\end{proof}
-
-\begin{lemma}
-  \label{lem:eGamma_eq_fDiv}
-  %\lean{}
-  %\leanok
-  \uses{def:eGamma, def:statInfoFun, def:fDiv}
-  On probability measures, the hockey-stick divergence $E_\gamma$ is an $f$-divergence for the function $\phi_{1,\gamma}$ of Definition~\ref{def:statInfoFun}.
-\end{lemma}
-
-\begin{proof}%\leanok
-\uses{lem:eGamma_eq_integral}
-This is a reformulation of Lemma~\ref{lem:eGamma_eq_integral}.
-\end{proof}
+% \begin{lemma}
+%   \label{lem:deGrootInfo_eq_fDiv}
+%   %\lean{}
+%   %\leanok
+%   \uses{def:deGrootInfo, def:statInfoFun, def:fDiv}
+%   On probability measures, the DeGroot statistical information $I_\pi$ is an $f$-divergence for the function $\phi_{\pi, 1-\pi}$ of Definition~\ref{def:statInfoFun}.
+% \end{lemma}
+
+% \begin{proof}%\leanok
+% \uses{lem:deGrootInfo_eq_integral}
+% This is a reformulation of Lemma~\ref{lem:deGrootInfo_eq_integral}.
+% \end{proof}
+
+% \begin{lemma}
+%   \label{lem:eGamma_eq_fDiv}
+%   %\lean{}
+%   %\leanok
+%   \uses{def:eGamma, def:statInfoFun, def:fDiv}
+%   On probability measures, the hockey-stick divergence $E_\gamma$ is an $f$-divergence for the function $\phi_{1,\gamma}$ of Definition~\ref{def:statInfoFun}.
+% \end{lemma}
+
+% \begin{proof}%\leanok
+% \uses{lem:eGamma_eq_integral}
+% This is a reformulation of Lemma~\ref{lem:eGamma_eq_integral}.
+% \end{proof}
 
 \begin{lemma}
   \label{lem:tv_eq_fDiv}

diff --git a/blueprint/src/sections/hellinger.tex b/blueprint/src/sections/hellinger.tex
@@ -6,7 +6,7 @@ \section{Hellinger distance}
   \label{def:Hellinger}
   \lean{ProbabilityTheory.sqHellinger}
   \leanok
-  \uses{def:fDiv}
+  \uses{def:fDiv, lem:fDiv_properties}
   Let $\mu, \nu$ be two measures. The squared Hellinger distance between $\mu$ and $\nu$ is
   \begin{align*}
     \Hsq(\mu, \nu) = D_f(\mu, \nu) \quad \text{with } f: x \mapsto \frac{1}{2}\left( 1 - \sqrt{x} \right)^2 \: .