diff --git a/PFR/MoreRuzsaDist.lean b/PFR/MoreRuzsaDist.lean index c2617063..9f95c12f 100644 --- a/PFR/MoreRuzsaDist.lean +++ b/PFR/MoreRuzsaDist.lean @@ -123,22 +123,19 @@ section multiDistance open Filter Function MeasureTheory Measure ProbabilityTheory open scoped BigOperators -variable {Ω G : Type*} - [mΩ : MeasurableSpace Ω] {μ : Measure Ω} +variable {G : Type*} [hG : MeasurableSpace G] [MeasurableSingletonClass G] [AddCommGroup G] [MeasurableSub₂ G] [MeasurableAdd₂ G] [Countable G] -variable {X : Ω → G} {Y : Ω' → G} {Z : Ω'' → G} [FiniteRange X] [FiniteRange Y] [FiniteRange Z] - /-- Let $X_{[m]} = (X_i)_{1 \leq i \leq m}$ non-empty finite tuple of $G$-valued random variables $X_i$. Then we define \[ D[X_{[m]}] := \bbH[\sum_{i=1}^m \tilde X_i] - \frac{1}{m} \sum_{i=1}^m \bbH[\tilde X_i], \] where the $\tilde X_i$ are independent copies of the $X_i$.-/ noncomputable -def multiDist {m:ℕ} (X : Fin m → Ω → G) (μ : Measure Ω := by volume_tac) : ℝ := sorry +def multiDist {m:ℕ} {Ω: Fin m → Type*} (hΩ: (i:Fin m) → MeasureSpace (Ω i)) (X : (i:Fin m) → (Ω i) → G) : ℝ := sorry -@[inherit_doc multiDist] notation3:max "D[" X " ; " μ "]" => multiDist X μ +@[inherit_doc multiDist] notation3:max "D[" X " ; " hΩ "]" => multiDist hΩ X /-- For any such tuple, we have $D[X_{[m]}] \geq 0$. -/ lemma multiDist_nonneg : 0 = 1 := by sorry @@ -164,4 +161,21 @@ lemma multidist_ruzsa_IV : 0 = 1 := by sorry /-- If $D[X_{[m]}]=0$, then for each $i \in I$ there is a finite subgroup $H_i \leq G$ such that $d[X_i; U_{H_i}] = 0$. -/ lemma multidist_eq_zero : 0 = 1 := by sorry +/-- If $X_{[m]} = (X_i)_{1 \leq i \leq m}$ and $Y_{[m]} = (Y_i)_{1 \leq i \leq m}$ are tuples of random variables, with the $X_i$ being $G$-valued (but the $Y_i$ need not be), then we define + \begin{equation}\label{multi-def-cond} + D[ X_{[m]} | Y_{[m]}] := \bbH[\sum_{i=1}^m \tilde X_i \big| (\tilde Y_j)_{1 \leq j \leq m} ] - \frac{1}{m} \sum_{i=1}^m \bbH[ \tilde X_i | \tilde Y_i] + \end{equation} + where $(\tilde X_i,\tilde Y_i)$, $1 \leq i \leq m$ are independent copies of $(X_i,Y_i), 1 \leq i \leq m$ (but note here that we do \emph{not} assume $X_i$ are independent of $Y_i$, or $\tilde X_i$ independent of $\tilde Y_i$). -/ +noncomputable +def condMultiDist {m:ℕ} {Ω: Fin m → Type*} (hΩ: (i:Fin m) → MeasureSpace (Ω i)) (X : (i:Fin m) → (Ω i) → G) (Y : (i:Fin m) → (Ω i) → G) : ℝ := sorry + +@[inherit_doc multiDist] notation3:max "D[" X " | " Y " ; " hΩ "]" => condMultiDist hΩ X Y + +/-- With the above notation, we have + \begin{equation}\label{multi-def-cond-alt} + D[ X_{[m]} | Y_{[m]} ] = \sum_{(y_i)_{1 \leq i \leq m}} \biggl(\prod_{1 \leq i \leq m} p_{Y_i}(y_i)\biggr) D[ (X_i \,|\, Y_i \mathop{=}y_i)_{1 \leq i \leq m}] + \end{equation} + where each $y_i$ ranges over the support of $p_{Y_i}$ for $1 \leq i \leq m$. -/ +lemma condMultiDist_eq : 0 = 1 := by sorry + end multiDistance diff --git a/PFR/MultiTauFunctional.lean b/PFR/MultiTauFunctional.lean new file mode 100644 index 00000000..0489f834 --- /dev/null +++ b/PFR/MultiTauFunctional.lean @@ -0,0 +1,115 @@ +import PFR.MoreRuzsaDist + +/-! +# The tau functional for multidistance + +Definition of the tau functional and basic facts + +## Main definitions: + + +## Main results + +-/ + +open MeasureTheory ProbabilityTheory +universe uG + +variable (Ω₀ : Type*) [MeasureSpace Ω₀] +[IsProbabilityMeasure (ℙ : Measure Ω₀)] +variable (G : Type uG) [AddCommGroup G] [Fintype G] [MeasurableSpace G] + +/-- A structure that packages all the fixed information in the main argument. -/ +structure multiRefPackage := + X₀ : Ω₀ → G + hmeas : Measurable X₀ + m : ℕ + η : ℝ + hη : 0 < η + +variable (p : multiRefPackage Ω₀ G) +variable {Ω₀ G} + +variable {Ω₁ Ω₂ Ω'₁ Ω'₂ : Type*} + + +/-- If $(X_i)_{1 \leq i \leq m}$ is a tuple, we define its $\tau$-functional +$$ \tau[ (X_i)_{1 \leq i \leq m}] := D[(X_i)_{1 \leq i \leq m}] + \eta \sum_{i=1}^m d[X_i; X^0].$$ +-/ +@[pp_dot] noncomputable def multiTau {Ω : Fin p.m → Type*} (hΩ: (i: Fin p.m) ↦ MeasureSpace (Ω i)) (X: (i: Fin p.m) → (Ω i) → G) : ℝ := sorry + +@[inherit_doc tau] +notation3:max "τ[" X " ; " hΩ " | " p"]" => multiTau p X hΩ + + +/-- A $\tau$-minimizer is a tuple $(X_i)_{1 \leq i \leq m}$ that minimizes the $\tau$-functional among all tuples of $G$-valued random variables. -/ +def multiTau_minimizes {Ω : Type*} (hΩ: (i: Fin p.m) ↦ MeasureSpace (Ω i)) (X: (i: Fin p.m) → (Ω i) → G) : Prop := sorry + +/-- If $G$ is finite, then a $\tau$-minimizer exists. -/ +lemma multiTau_min_exists : 0 = 1 := by sorry + +/-- If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, then $\sum_{i=1}^m d[X_i; X^0] \leq \frac{2m}{\eta} d[X^0; X^0]$. -/ +lemma multiTau_min_sum_le : 0 = 1 := by sorry + +/-- If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, and $k := D[(X_i)_{1 \leq i \leq m}]$, then for any other tuple $(X'_i)_{1 \leq i \leq m}$, one has + $$ k - D[(X'_i)_{1 \leq i \leq m}] \leq \eta \sum_{i=1}^m d[X_i; X'_i].$$ +-/ +lemma sub_multiDistance_le : 0 = 1 := by sorry + +/-- If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, and $k := D[(X_i)_{1 \leq i \leq m}]$, then for any other tuples $(X'_i)_{1 \leq i \leq m}$ and $(Y_i)_{1 \leq i \leq m}$ with the $X'_i$ $G$-valued, one has + $$ k - D[(X'_i)_{1 \leq i \leq m} | (Y_i)_{1 \leq i \leq m}] \leq \eta \sum_{i=1}^m d[X_i; X'_i|Y_i].$$ -/ +lemma sub_condMultiDistance_le : 0 = 1 := by sorry + +/-- With the notation of the previous lemma, we have + \begin{equation}\label{5.3-conv} + k - D[ X'_{[m]} | Y_{[m]} ] \leq \eta \sum_{i=1}^m d[X_{\sigma(i)};X'_i|Y_i] + \end{equation} +for any permutation $\sigma : \{1,\dots,m\} \rightarrow \{1,\dots,m\}$. -/ +lemma sub_condMultiDistance_le' : 0 = 1 := by sorry + +open BigOperators + +/-- For any $G$-valued random variables $X'_1,X'_2$ and random variables $Z,W$, one can lower +bound $d[X'_1|Z;X'_2|W]$ by +$$k - \eta (d[X^0_1;X'_1|Z] - d[X^0_1;X_1] ) - \eta (d[X^0_2;X'_2|W] - d[X^0_2;X_2] ).$$ +-/ +lemma condRuzsaDistance_ge_of_min [MeasurableSingletonClass G] + [Fintype S] [MeasurableSpace S] [MeasurableSingletonClass S] + [Fintype T] [MeasurableSpace T] [MeasurableSingletonClass T] + (h : tau_minimizes p X₁ X₂) (h1 : Measurable X'₁) (h2 : Measurable X'₂) + (Z : Ω'₁ → S) (W : Ω'₂ → T) (hZ : Measurable Z) (hW : Measurable W) : + d[X₁ # X₂] - p.η * (d[p.X₀₁ # X'₁ | Z] - d[p.X₀₁ # X₁]) + - p.η * (d[p.X₀₂ # X'₂ | W] - d[p.X₀₂ # X₂]) ≤ d[X'₁ | Z # X'₂ | W] := by + have hz (a : ℝ) : a = ∑ z in FiniteRange.toFinset Z, (ℙ (Z ⁻¹' {z})).toReal * a := by + simp_rw [← Finset.sum_mul,← Measure.map_apply hZ (MeasurableSet.singleton _), Finset.sum_toReal_measure_singleton] + rw [FiniteRange.full hZ] + simp + have hw (a : ℝ) : a = ∑ w in FiniteRange.toFinset W, (ℙ (W ⁻¹' {w})).toReal * a := by + simp_rw [← Finset.sum_mul,← Measure.map_apply hW (MeasurableSet.singleton _), Finset.sum_toReal_measure_singleton] + rw [FiniteRange.full hW] + simp + rw [condRuzsaDist_eq_sum h1 hZ h2 hW, condRuzsaDist'_eq_sum h1 hZ, hz d[X₁ # X₂], + hz d[p.X₀₁ # X₁], hz (p.η * (d[p.X₀₂ # X'₂ | W] - d[p.X₀₂ # X₂])), + ← Finset.sum_sub_distrib, Finset.mul_sum, ← Finset.sum_sub_distrib, ← Finset.sum_sub_distrib] + apply Finset.sum_le_sum + intro z _ + rw [condRuzsaDist'_eq_sum h2 hW, hw d[p.X₀₂ # X₂], + hw ((ℙ (Z ⁻¹' {z})).toReal * d[X₁ # X₂] - p.η * ((ℙ (Z ⁻¹' {z})).toReal * + d[p.X₀₁ ; ℙ # X'₁ ; ℙ[|Z ← z]] - (ℙ (Z ⁻¹' {z})).toReal * d[p.X₀₁ # X₁])), + ← Finset.sum_sub_distrib, Finset.mul_sum, Finset.mul_sum, ← Finset.sum_sub_distrib] + apply Finset.sum_le_sum + intro w _ + rcases eq_or_ne (ℙ (Z ⁻¹' {z})) 0 with hpz | hpz + . simp [hpz] + rcases eq_or_ne (ℙ (W ⁻¹' {w})) 0 with hpw | hpw + . simp [hpw] + set μ := (hΩ₁.volume)[|Z ← z] + have hμ : IsProbabilityMeasure μ := cond_isProbabilityMeasure ℙ hpz + set μ' := ℙ[|W ← w] + have hμ' : IsProbabilityMeasure μ' := cond_isProbabilityMeasure ℙ hpw + suffices d[X₁ # X₂] - p.η * (d[p.X₀₁; volume # X'₁; μ] - d[p.X₀₁ # X₁]) - + p.η * (d[p.X₀₂; volume # X'₂; μ'] - d[p.X₀₂ # X₂]) ≤ d[X'₁ ; μ # X'₂; μ'] by + replace this := mul_le_mul_of_nonneg_left this (show 0 ≤ (ℙ (Z ⁻¹' {z})).toReal * (ℙ (W ⁻¹' {w})).toReal by positivity) + convert this using 1 + ring + exact distance_ge_of_min' p h h1 h2 diff --git a/blueprint/src/chapter/torsion.tex b/blueprint/src/chapter/torsion.tex index 48549a14..bba1eadb 100644 --- a/blueprint/src/chapter/torsion.tex +++ b/blueprint/src/chapter/torsion.tex @@ -258,24 +258,24 @@ \subsection{The tau functional} Fix $m \geq 2$, and a reference variable $X^0$ in $G$. -\begin{definition}[$\eta$]\label{eta-def-multi} +\begin{definition}[$\eta$]\label{eta-def-multi}\lean{multiRefPackage}\leanok We set $\eta := \frac{1}{32m^3}$. \end{definition} -\begin{definition}[$\tau$-functional]\label{tau-def-multi}\uses{eta-def-multi} If $(X_i)_{1 \leq i \leq m}$ is a tuple, we define its $\tau$-functional +\begin{definition}[$\tau$-functional]\label{tau-def-multi}\uses{eta-def-multi}\lean{multiTau} If $(X_i)_{1 \leq i \leq m}$ is a tuple, we define its $\tau$-functional $$ \tau[ (X_i)_{1 \leq i \leq m}] := D[(X_i)_{1 \leq i \leq m}] + \eta \sum_{i=1}^m d[X_i; X^0].$$ \end{definition} -\begin{definition}[$\tau$-minimizer]\label{tau-min-multi}\uses{tau-def-multi} A $\tau$-minimizer is a tuple $(X_i)_{1 \leq i \leq m}$ that minimizes the $\tau$-functional among all tuples of $G$-valued random variables. +\begin{definition}[$\tau$-minimizer]\label{tau-min-multi}\uses{tau-def-multi}\lean{multiTau_minimizes} A $\tau$-minimizer is a tuple $(X_i)_{1 \leq i \leq m}$ that minimizes the $\tau$-functional among all tuples of $G$-valued random variables. \end{definition} -\begin{proposition}[Existence of $\tau$-minimizer]\label{tau-min-exist-multi} If $G$ is finite, then a $\tau$-minimizer exists. +\begin{proposition}[Existence of $\tau$-minimizer]\label{tau-min-exist-multi}\lean{multiTau_min_exists} If $G$ is finite, then a $\tau$-minimizer exists. \end{proposition} \begin{proof}\uses{tau-def-multi} This is similar to the proof of Proposition \ref{tau-min}. \end{proof} -\begin{proposition}[Minimizer close to reference variables]\label{tau-ref} If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, then $\sum_{i=1}^m d[X_i; X^0] \leq \frac{2m}{\eta} d[X^0; X^0]$. +\begin{proposition}[Minimizer close to reference variables]\label{tau-ref}\lean{multiTau_min_sum_le} If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, then $\sum_{i=1}^m d[X_i; X^0] \leq \frac{2m}{\eta} d[X^0; X^0]$. \end{proposition} \begin{proof}\uses{tau-min-multi, multidist-nonneg, multidist-ruzsa-III} By Definition \ref{tau-min-multi} we have @@ -285,7 +285,7 @@ \subsection{The tau functional} The claim now follows from Lemma \ref{multidist-ruzsa-III}. \end{proof} -\begin{lemma}[Lower bound on multidistance]\label{multidist-lower} If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, and $k := D[(X_i)_{1 \leq i \leq m}]$, then for any other tuple $(X'_i)_{1 \leq i \leq m}$, one has +\begin{lemma}[Lower bound on multidistance]\label{multidist-lower}\lean{sub_multiDistance_le} If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, and $k := D[(X_i)_{1 \leq i \leq m}]$, then for any other tuple $(X'_i)_{1 \leq i \leq m}$, one has $$ k - D[(X'_i)_{1 \leq i \leq m}] \leq \eta \sum_{i=1}^m d[X_i; X'_i].$$ \end{lemma} @@ -299,14 +299,14 @@ \subsection{The tau functional} The claim follows. \end{proof} -\begin{definition}[Conditional multidistance]\label{cond-multidist-def}\uses{multidist-def} If $X_{[m]} = (X_i)_{1 \leq i \leq m}$ and $Y_{[m]} = (Y_i)_{1 \leq i \leq m}$ are tuples of random variables, with the $X_i$ being $G$-valued (but the $Y_i$ need not be), then we define +\begin{definition}[Conditional multidistance]\label{cond-multidist-def}\uses{multidist-def}\lean{condMultiDist} If $X_{[m]} = (X_i)_{1 \leq i \leq m}$ and $Y_{[m]} = (Y_i)_{1 \leq i \leq m}$ are tuples of random variables, with the $X_i$ being $G$-valued (but the $Y_i$ need not be), then we define \begin{equation}\label{multi-def-cond} D[ X_{[m]} | Y_{[m]}] := \bbH[\sum_{i=1}^m \tilde X_i \big| (\tilde Y_j)_{1 \leq j \leq m} ] - \frac{1}{m} \sum_{i=1}^m \bbH[ \tilde X_i | \tilde Y_i] \end{equation} where $(\tilde X_i,\tilde Y_i)$, $1 \leq i \leq m$ are independent copies of $(X_i,Y_i), 1 \leq i \leq m$ (but note here that we do \emph{not} assume $X_i$ are independent of $Y_i$, or $\tilde X_i$ independent of $\tilde Y_i$). \end{definition} -\begin{lemma}[Alternate form of conditional multidistance]\label{cond-multidist-alt} With the above notation, we have +\begin{lemma}[Alternate form of conditional multidistance]\label{cond-multidist-alt}\lean{condMultiDist_eq} With the above notation, we have \begin{equation}\label{multi-def-cond-alt} D[ X_{[m]} | Y_{[m]} ] = \sum_{(y_i)_{1 \leq i \leq m}} \biggl(\prod_{1 \leq i \leq m} p_{Y_i}(y_i)\biggr) D[ (X_i \,|\, Y_i \mathop{=}y_i)_{1 \leq i \leq m}] \end{equation} @@ -317,7 +317,7 @@ \subsection{The tau functional} This is routine from Definition \ref{conditional-entropy-def} and Definitions \ref{multidist-def} and \ref{cond-multidist-def}. \end{proof} -\begin{lemma}[Lower bound on conditional multidistance]\label{cond-multidist-lower} If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, and $k := D[(X_i)_{1 \leq i \leq m}]$, then for any other tuples $(X'_i)_{1 \leq i \leq m}$ and $(Y_i)_{1 \leq i \leq m}$ with the $X'_i$ $G$-valued, one has +\begin{lemma}[Lower bound on conditional multidistance]\label{cond-multidist-lower}\lean{sub_condMultiDistance_le} If $(X_i)_{1 \leq i \leq m}$ is a $\tau$-minimizer, and $k := D[(X_i)_{1 \leq i \leq m}]$, then for any other tuples $(X'_i)_{1 \leq i \leq m}$ and $(Y_i)_{1 \leq i \leq m}$ with the $X'_i$ $G$-valued, one has $$ k - D[(X'_i)_{1 \leq i \leq m} | (Y_i)_{1 \leq i \leq m}] \leq \eta \sum_{i=1}^m d[X_i; X'_i|Y_i].$$ \end{lemma} @@ -325,11 +325,11 @@ \subsection{The tau functional} Immediate from Lemma \ref{multidist-lower}, Lemma \ref{cond-multidist-alt}, and Definition \ref{cond-dist-def}. \end{proof} -\begin{corollary}[Lower bound on conditional multidistance, II]\label{cond-multidist-lower-II} With the notation of the previous lemma, we have +\begin{corollary}[Lower bound on conditional multidistance, II]\label{cond-multidist-lower-II}\lean{sub_condMultiDistance_le'} With the notation of the previous lemma, we have \begin{equation}\label{5.3-conv} k - D[ X'_{[m]} | Y_{[m]} ] \leq \eta \sum_{i=1}^m d[X_{\sigma(i)};X'_i|Y_i] \end{equation} -for any permutation $\sigma : I \rightarrow I$. +for any permutation $\sigma : \{1,\dots,m\} \rightarrow \{1,\dots,m\}$. \end{corollary} \begin{proof}\uses{cond-multidist-lower, multidist-perm} This follows from Lemma \ref{cond-multidist-lower} and Lemma \ref{multidist-perm}.