Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into TestLeanCopilot
Browse files Browse the repository at this point in the history
  • Loading branch information
LorenzoLuccioli committed Aug 29, 2024
2 parents aa32fcb + 7dbb831 commit 8a50cba
Show file tree
Hide file tree
Showing 15 changed files with 950 additions and 334 deletions.
2 changes: 1 addition & 1 deletion blueprint/src/content.tex
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ \chapter{The risk of simple binary hypothesis testing}
\input{sections/kernels}
\input{sections/rn_deriv}
\input{sections/convex}
\input{sections/deficiency}
%\input{sections/deficiency}
1 change: 1 addition & 0 deletions blueprint/src/macros/print.tex
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
% Dummy macros that make sense only for web version.
\newcommand{\lean}[1]{}
\newcommand{\leanok}{}
\newcommand{\mathlibok}{}
% Make sure that arguments of \uses and \proves are real labels, by using invisible refs:
% latex prints a warning if the label is not defined, but nothing is shown in the pdf file.
% It uses LaTeX3 programming, this is why we use the expl3 package.
Expand Down
5 changes: 4 additions & 1 deletion blueprint/src/sections/chernoff.tex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ \section{Chernoff divergence}
\label{def:Chernoff}
\lean{ProbabilityTheory.chernoffDiv}
\leanok
\uses{def:Renyi}
\uses{def:Renyi, lem:renyi_properties}
The Chernoff divergence of order $\alpha > 0$ between two measures $\mu$ and $\nu$ on $\mathcal X$ is
\begin{align*}
C_\alpha(\mu, \nu) = \inf_{\xi \in \mathcal P(\mathcal X)}\max\{R_\alpha(\xi, \mu), R_\alpha(\xi, \nu)\} \: .
Expand All @@ -28,6 +28,7 @@ \section{Chernoff divergence}
This is $R_1 = \KL$ (by definition).
\end{proof}


\begin{lemma}[Symmetry]
\label{lem:chernoff_symm}
%\lean{}
Expand All @@ -40,6 +41,7 @@ \section{Chernoff divergence}
Immediate from the definition.
\end{proof}


\begin{lemma}[Monotonicity]
\label{lem:chernoff_mono}
%\lean{}
Expand All @@ -53,6 +55,7 @@ \section{Chernoff divergence}
Consequence of Lemma~\ref{lem:renyi_monotone}.
\end{proof}


\begin{lemma}
\label{lem:chernoff_eq_max_renyi}
%\lean{}
Expand Down
118 changes: 91 additions & 27 deletions blueprint/src/sections/f_divergence.tex
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ \subsection{Definition and basic properties}
if $x \mapsto f\left(\frac{d \mu}{d \nu}(x)\right)$ is $\nu$-integrable and $+\infty$ otherwise.
\end{definition}


\begin{lemma}
\label{lem:fDiv_ne_top_iff}
\lean{ProbabilityTheory.fDiv_ne_top_iff}
Expand Down Expand Up @@ -46,6 +47,7 @@ \subsection{Definition and basic properties}
Compute the integral.
\end{proof}


\begin{lemma}
\label{lem:fDiv_self}
\lean{ProbabilityTheory.fDiv_self}
Expand Down Expand Up @@ -87,6 +89,7 @@ \subsection{Definition and basic properties}
$D_{x\mapsto x}(\mu, \nu) = (\frac{d\mu}{d\nu}\cdot \nu)(\mathcal X) + \mu_{\perp \nu}(\mathcal X) = \mu (\mathcal X)$.
\end{proof}


\begin{lemma}
\label{lem:fDiv_mul}
\lean{ProbabilityTheory.fDiv_mul}
Expand All @@ -99,6 +102,7 @@ \subsection{Definition and basic properties}
Linearity of the integral.
\end{proof}


\begin{lemma}
\label{lem:fDiv_add}
\lean{ProbabilityTheory.fDiv_add}
Expand All @@ -111,6 +115,7 @@ \subsection{Definition and basic properties}
Linearity of the integral.
\end{proof}


\begin{lemma}
\label{lem:fDiv_add_linear}
\lean{ProbabilityTheory.fDiv_add_linear}
Expand All @@ -124,6 +129,7 @@ \subsection{Definition and basic properties}
Linearity (Lemmas~\ref{lem:fDiv_add} and~\ref{lem:fDiv_mul}), then Lemma~\ref{lem:fDiv_const} and~\ref{lem:fDiv_id}.
\end{proof}


\begin{lemma}
\label{lem:fDiv_map_measurableEmbedding}
\lean{ProbabilityTheory.fDiv_map_measurableEmbedding}
Expand Down Expand Up @@ -223,6 +229,7 @@ \subsection{Definition and basic properties}
$\frac{d(\mu_1 + \mu_2)}{d \nu} = \frac{d \mu_1}{d \nu}$ a.e. and $(\mu_1 + \mu_2)_{\perp \nu} = \mu_2$.
\end{proof}


\begin{lemma}[Superseded by Lemma~\ref{lem:fDiv_add_measure_le}]
\label{lem:fDiv_add_measure_le_of_ac}
\lean{ProbabilityTheory.fDiv_add_measure_le_of_ac}
Expand All @@ -244,6 +251,7 @@ \subsection{Definition and basic properties}
\end{align*}
\end{proof}


\begin{lemma}
\label{lem:fDiv_add_measure_le}
\lean{ProbabilityTheory.fDiv_add_measure_le}
Expand All @@ -267,6 +275,7 @@ \subsection{Definition and basic properties}
\end{align*}
\end{proof}


\begin{lemma}
\label{lem:fDiv_eq_add_withDensity_derivAtTop}
\lean{ProbabilityTheory.fDiv_eq_add_withDensity_derivAtTop}
Expand All @@ -281,6 +290,7 @@ \subsection{Definition and basic properties}
Apply Lemma~\ref{lem:fDiv_absolutelyContinuous_add_mutuallySingular} to $\frac{d\mu}{d\nu}\cdot \nu$ and $\mu_{\perp \nu}$.
\end{proof}


\begin{lemma}[Superseded by Lemma~\ref{lem:le_fDiv}]
\label{lem:le_fDiv_of_ac}
\lean{ProbabilityTheory.le_fDiv_of_ac}
Expand All @@ -301,6 +311,7 @@ \subsection{Definition and basic properties}

\end{proof}


\begin{lemma}
\label{lem:le_fDiv}
\lean{ProbabilityTheory.le_fDiv}
Expand All @@ -323,6 +334,7 @@ \subsection{Definition and basic properties}
\end{align*}
\end{proof}


\begin{lemma}
\label{lem:fDiv_nonneg}
\lean{ProbabilityTheory.fDiv_nonneg}
Expand All @@ -337,6 +349,7 @@ \subsection{Definition and basic properties}
\end{proof}



\subsection{Conditional f-divergence}

TODO: replace the definition by $D(\mu \otimes \kappa, \mu \otimes \eta)$, which allows for a more general proof of the ``conditioning increases divergence'' inequality.
Expand All @@ -353,6 +366,7 @@ \subsection{Conditional f-divergence}
if $x \mapsto D_f(\kappa(x), \eta(x))$ is $\mu$-integrable and $+\infty$ otherwise.
\end{definition}


\begin{lemma}
\label{lem:condFDiv_nonneg}
\lean{ProbabilityTheory.condFDiv_nonneg}
Expand All @@ -366,6 +380,7 @@ \subsection{Conditional f-divergence}
Apply Lemma~\ref{lem:fDiv_nonneg}.
\end{proof}


\begin{lemma}
\label{lem:condFDiv_const}
\lean{ProbabilityTheory.condFDiv_const}
Expand All @@ -383,6 +398,7 @@ \subsection{Conditional f-divergence}
= D_f(\mu, \nu) \xi (\mathcal X)$$
\end{proof}


% TODO : this statement is not the same as the lean code, in particular here there is only one measure mentioned, while in the lean we have 2, also the rest of the statement is not the same
\begin{lemma}
\label{lem:integrable_fDiv_compProd_iff}
Expand All @@ -408,6 +424,7 @@ \subsection{Conditional f-divergence}
TODO
\end{proof}


\begin{lemma}
\label{lem:condFDiv_ne_top_iff}
\lean{ProbabilityTheory.condFDiv_ne_top_iff}
Expand All @@ -426,6 +443,7 @@ \subsection{Conditional f-divergence}
\uses{lem:integrable_fDiv_compProd_iff}
\end{proof}


\begin{lemma}
\label{lem:fDiv_compProd_ne_top_iff}
\lean{ProbabilityTheory.condFDiv_ne_top_iff_fDiv_compProd_ne_top}
Expand All @@ -439,6 +457,7 @@ \subsection{Conditional f-divergence}
\uses{lem:integrable_fDiv_compProd_iff, lem:condFDiv_ne_top_iff}
\end{proof}


\begin{lemma}
\label{lem:fDiv_compProd_left}
\lean{ProbabilityTheory.fDiv_compProd_left}
Expand Down Expand Up @@ -473,6 +492,52 @@ \subsection{Conditional f-divergence}




TODO: find a way to hide the following dummy lemma
\begin{lemma}[Dummy lemma: fDiv properties]
\label{lem:fDiv_properties}
%\lean{}
\leanok
\uses{def:fDiv, def:condFDiv}
Dummy node to summarize properties of $f$-divergences.
\end{lemma}

\begin{proof}\leanok
\uses{
lem:fDiv_ne_top_iff,
lem:fDiv_const,
lem:fDiv_self,
lem:fDiv_eq_zero_iff,
lem:fDiv_id,
lem:fDiv_mul,
lem:fDiv_add,
lem:fDiv_add_linear,
lem:fDiv_map_measurableEmbedding,
lem:fDiv_symm,
lem:fDiv_add_smul_left,
lem:fDiv_add_smul_right,
lem:fDiv_add_smul_right',
lem:fDiv_absolutelyContinuous_add_mutuallySingular,
lem:fDiv_add_measure_le,
lem:fDiv_eq_add_withDensity_derivAtTop,
lem:le_fDiv,
lem:fDiv_nonneg,
lem:condFDiv_nonneg,
lem:condFDiv_const,
lem:condFDiv_ne_top_iff,
lem:fDiv_compProd_ne_top_iff,
lem:fDiv_compProd_left
}
\end{proof}









\subsection{Integral representation of $f$-divergences}


Expand Down Expand Up @@ -918,7 +983,7 @@ \subsubsection{Kernel proof}
&= D(\mu, \nu)
\: .
\end{align*}
The main drawback of this approach is that it requires the measurable spaces to be standard Borel.
The main drawback of this approach is that it requires that the Bayesian inverse exists, which is not always the case, unless for example the measurable spaces are standard Borel.

The inequality $D_f(\mu, \nu) \le D_f(\mu \otimes \kappa, \nu \otimes \eta)$ is obtained by describing the Radon-Nikodym derivative $\frac{d(\mu \otimes \kappa)}{d(\nu \otimes \eta)}$ and using convexity properties of the function $f$.

Expand Down Expand Up @@ -1287,7 +1352,6 @@ \subsection{Convexity}




\subsection{Variational representations}

TODO
Expand Down Expand Up @@ -1494,31 +1558,31 @@ \subsection{Statistical divergences}
This is a reformulation of Theorem~\ref{thm:statInfo_eq_integral}.
\end{proof}

\begin{lemma}
\label{lem:deGrootInfo_eq_fDiv}
%\lean{}
%\leanok
\uses{def:deGrootInfo, def:statInfoFun, def:fDiv}
On probability measures, the DeGroot statistical information $I_\pi$ is an $f$-divergence for the function $\phi_{\pi, 1-\pi}$ of Definition~\ref{def:statInfoFun}.
\end{lemma}

\begin{proof}%\leanok
\uses{lem:deGrootInfo_eq_integral}
This is a reformulation of Lemma~\ref{lem:deGrootInfo_eq_integral}.
\end{proof}

\begin{lemma}
\label{lem:eGamma_eq_fDiv}
%\lean{}
%\leanok
\uses{def:eGamma, def:statInfoFun, def:fDiv}
On probability measures, the hockey-stick divergence $E_\gamma$ is an $f$-divergence for the function $\phi_{1,\gamma}$ of Definition~\ref{def:statInfoFun}.
\end{lemma}

\begin{proof}%\leanok
\uses{lem:eGamma_eq_integral}
This is a reformulation of Lemma~\ref{lem:eGamma_eq_integral}.
\end{proof}
% \begin{lemma}
% \label{lem:deGrootInfo_eq_fDiv}
% %\lean{}
% %\leanok
% \uses{def:deGrootInfo, def:statInfoFun, def:fDiv}
% On probability measures, the DeGroot statistical information $I_\pi$ is an $f$-divergence for the function $\phi_{\pi, 1-\pi}$ of Definition~\ref{def:statInfoFun}.
% \end{lemma}

% \begin{proof}%\leanok
% \uses{lem:deGrootInfo_eq_integral}
% This is a reformulation of Lemma~\ref{lem:deGrootInfo_eq_integral}.
% \end{proof}

% \begin{lemma}
% \label{lem:eGamma_eq_fDiv}
% %\lean{}
% %\leanok
% \uses{def:eGamma, def:statInfoFun, def:fDiv}
% On probability measures, the hockey-stick divergence $E_\gamma$ is an $f$-divergence for the function $\phi_{1,\gamma}$ of Definition~\ref{def:statInfoFun}.
% \end{lemma}

% \begin{proof}%\leanok
% \uses{lem:eGamma_eq_integral}
% This is a reformulation of Lemma~\ref{lem:eGamma_eq_integral}.
% \end{proof}

\begin{lemma}
\label{lem:tv_eq_fDiv}
Expand Down
2 changes: 1 addition & 1 deletion blueprint/src/sections/hellinger.tex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ \section{Hellinger distance}
\label{def:Hellinger}
\lean{ProbabilityTheory.sqHellinger}
\leanok
\uses{def:fDiv}
\uses{def:fDiv, lem:fDiv_properties}
Let $\mu, \nu$ be two measures. The squared Hellinger distance between $\mu$ and $\nu$ is
\begin{align*}
\Hsq(\mu, \nu) = D_f(\mu, \nu) \quad \text{with } f: x \mapsto \frac{1}{2}\left( 1 - \sqrt{x} \right)^2 \: .
Expand Down
Loading

0 comments on commit 8a50cba

Please sign in to comment.