private-attribution · andyleiserson · Aug 27, 2024 · Aug 15, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/ipa-core/build.rs b/ipa-core/build.rs
@@ -21,6 +21,7 @@ track_steps!(
             prf_sharding::step,
             shuffle::step,
             aggregation::step,
+            oprf_padding::step,
             step,
         },
         dp::step,

diff --git a/ipa-core/src/error.rs b/ipa-core/src/error.rs
@@ -86,6 +86,8 @@ pub enum Error {
     DZKPMasks,
     #[error("Attempt to operate on zero records")]
     ZeroRecords,
+    #[error("DP related error: {0}")]
+    DPPaddingError(String),
     #[error("Epsilon submitted to query is out of bounds")]
     EpsilonOutOfBounds,
     #[error("Missing total records in {0}")]

diff --git a/ipa-core/src/protocol/context/prss.rs b/ipa-core/src/protocol/context/prss.rs
@@ -1,7 +1,7 @@
 //! Metric-aware PRSS decorators
 
 use generic_array::{ArrayLength, GenericArray};
-use rand_core::{Error, RngCore};
+use rand_core::{CryptoRng, Error, RngCore};
 
 use crate::{
     helpers::{Direction, Role},
@@ -145,3 +145,5 @@ impl RngCore for InstrumentedSequentialSharedRandomness<'_> {
         self.inner.try_fill_bytes(dest)
     }
 }
+
+impl CryptoRng for InstrumentedSequentialSharedRandomness<'_> {}
diff --git a/ipa-core/src/protocol/ipa_prf/aggregation/breakdown_reveal.rs b/ipa-core/src/protocol/ipa_prf/aggregation/breakdown_reveal.rs
@@ -19,8 +19,11 @@ use crate::{
         basics::semi_honest_reveal,
         context::Context,
         ipa_prf::{
-            aggregation::step::AggregationStep, prf_sharding::SecretSharedAttributionOutputs,
-            shuffle::shuffle_attribution_outputs, BreakdownKey,
+            aggregation::step::AggregationStep,
+            oprf_padding::{apply_dp_padding, PaddingParameters},
+            prf_sharding::{AttributionOutputs, SecretSharedAttributionOutputs},
+            shuffle::shuffle_attribution_outputs,
+            BreakdownKey,
         },
         BooleanProtocols, RecordId,
     },
@@ -59,7 +62,17 @@ where
     BitDecomposed<Replicated<Boolean, B>>:
         for<'a> TransposeFrom<&'a [Replicated<TV>; B], Error = Infallible>,
 {
-    let atributions = shuffle_attributions(&ctx, attributed_values).await?;
+    let dp_padding_params = PaddingParameters::relaxed();
+    // Apply DP padding for Breakdown Reveal Aggregation
+    let attributed_values_padded =
+        apply_dp_padding::<_, AttributionOutputs<Replicated<BK>, Replicated<TV>>, B>(
+            ctx.narrow(&AggregationStep::PaddingDp),
+            attributed_values,
+            dp_padding_params,
+        )
+        .await?;
+
+    let atributions = shuffle_attributions(&ctx, attributed_values_padded).await?;
     let grouped_tvs = reveal_breakdowns(&ctx, atributions).await?;
     let num_rows = grouped_tvs.max_len;
     aggregate_values::<_, HV, B>(ctx, grouped_tvs.into_stream(), num_rows).await

diff --git a/ipa-core/src/protocol/ipa_prf/aggregation/step.rs b/ipa-core/src/protocol/ipa_prf/aggregation/step.rs
@@ -5,6 +5,8 @@ pub(crate) enum AggregationStep {
     /// key. Aggregation based on move to bucket approach does not need them.
     /// When reveal-based aggregation is the default, other steps (such as `MoveToBucket`)
     /// should be deleted
+    #[step(child = crate::protocol::ipa_prf::oprf_padding::step::PaddingDpStep, name="padding_dp")]
+    PaddingDp,
     #[step(child = crate::protocol::ipa_prf::shuffle::step::OPRFShuffleStep)]
     Shuffle,
     RevealStep,

diff --git a/ipa-core/src/protocol/ipa_prf/mod.rs b/ipa-core/src/protocol/ipa_prf/mod.rs
@@ -25,6 +25,7 @@ use crate::{
         },
         ipa_prf::{
             boolean_ops::convert_to_fp25519,
+            oprf_padding::apply_dp_padding,
             prf_eval::{eval_dy_prf, gen_prf_key},
             prf_sharding::{
                 attribute_cap_aggregate, histograms_ranges_sortkeys, PrfShardedIpaInputRow,
@@ -91,7 +92,9 @@ use step::IpaPrfStep as Step;
 
 use crate::{
     helpers::query::DpMechanism,
-    protocol::{context::Validator, dp::dp_for_histogram},
+    protocol::{
+        context::Validator, dp::dp_for_histogram, ipa_prf::oprf_padding::PaddingParameters,
+    },
 };
 
 #[derive(Clone, Debug, Default)]
@@ -218,6 +221,7 @@ pub async fn oprf_ipa<'ctx, BK, TV, HV, TS, const SS_BITS: usize, const B: usize
     input_rows: Vec<OPRFIPAInputRow<BK, TV, TS>>,
     attribution_window_seconds: Option<NonZeroU32>,
     dp_params: DpMechanism,
+    dp_padding_params: PaddingParameters,
 ) -> Result<Vec<Replicated<HV>>, Error>
 where
     BK: BreakdownKey<B>,
@@ -247,7 +251,16 @@ where
     if input_rows.is_empty() {
         return Ok(vec![Replicated::ZERO; B]);
     }
-    let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), input_rows).await?;
+
+    // Apply DP padding for OPRF
+    let padded_input_rows = apply_dp_padding::<_, OPRFIPAInputRow<BK, TV, TS>, B>(
+        ctx.narrow(&Step::PaddingDp),
+        input_rows,
+        dp_padding_params,
+    )
+    .await?;
+
+    let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), padded_input_rows).await?;
     let mut prfd_inputs = compute_prf_for_inputs(ctx.clone(), &shuffled).await?;
 
     prfd_inputs.sort_by(|a, b| a.prf_of_match_key.cmp(&b.prf_of_match_key));
@@ -376,7 +389,10 @@ pub mod tests {
             U128Conversions,
         },
         helpers::query::DpMechanism,
-        protocol::{dp::NoiseParams, ipa_prf::oprf_ipa},
+        protocol::{
+            dp::NoiseParams,
+            ipa_prf::{oprf_ipa, oprf_padding::PaddingParameters},
+        },
         test_executor::run,
         test_fixture::{ipa::TestRawDataRecord, Reconstruct, Runner, TestWorld},
     };
@@ -410,14 +426,22 @@ pub mod tests {
                 test_input(10, 12345, true, 0, 5),
                 test_input(0, 68362, false, 1, 0),
                 test_input(20, 68362, true, 0, 2),
-            ];
+            ]; // trigger value of 2 attributes to earlier source row with breakdown 1 and trigger
+               // value of 5 attributes to source row with breakdown 2.
             let dp_params = DpMechanism::NoDp;
+            let padding_params = PaddingParameters::relaxed();
 
             let mut result: Vec<_> = world
                 .semi_honest(records.into_iter(), |ctx, input_rows| async move {
-                    oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(ctx, input_rows, None, dp_params)
-                        .await
-                        .unwrap()
+                    oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(
+                        ctx,
+                        input_rows,
+                        None,
+                        dp_params,
+                        padding_params,
+                    )
+                    .await
+                    .unwrap()
                 })
                 .await
                 .reconstruct();
@@ -432,6 +456,8 @@ pub mod tests {
     #[test]
     fn semi_honest_with_dp() {
         const SS_BITS: usize = 1;
+        // setting SS_BITS this small will cause clipping in capping
+        // since per_user_credit_cap == 2^SS_BITS
         semi_honest_with_dp_internal::<SS_BITS>();
     }
     #[test]
@@ -451,6 +477,7 @@ pub mod tests {
             let epsilon = 10.0;
             let dp_params = DpMechanism::Binomial { epsilon };
             let per_user_credit_cap = 2_f64.powi(i32::try_from(SS_BITS).unwrap());
+            let padding_params = PaddingParameters::relaxed();
             let world = TestWorld::default();
 
             let records: Vec<TestRawDataRecord> = vec![
@@ -462,9 +489,15 @@ pub mod tests {
             ];
             let mut result: Vec<_> = world
                 .semi_honest(records.into_iter(), |ctx, input_rows| async move {
-                    oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(ctx, input_rows, None, dp_params)
-                        .await
-                        .unwrap()
+                    oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(
+                        ctx,
+                        input_rows,
+                        None,
+                        dp_params,
+                        padding_params,
+                    )
+                    .await
+                    .unwrap()
                 })
                 .await
                 .reconstruct();
@@ -513,12 +546,19 @@ pub mod tests {
 
             let records: Vec<TestRawDataRecord> = vec![];
             let dp_params = DpMechanism::NoDp;
+            let padding_params = PaddingParameters::no_padding();
 
             let mut result: Vec<_> = world
                 .semi_honest(records.into_iter(), |ctx, input_rows| async move {
-                    oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
-                        .await
-                        .unwrap()
+                    oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
+                        ctx,
+                        input_rows,
+                        None,
+                        dp_params,
+                        padding_params,
+                    )
+                    .await
+                    .unwrap()
                 })
                 .await
                 .reconstruct();
@@ -542,12 +582,19 @@ pub mod tests {
                 test_input(0, 68362, false, 1, 0),
             ];
             let dp_params = DpMechanism::NoDp;
+            let padding_params = PaddingParameters::no_padding();
 
             let mut result: Vec<_> = world
                 .semi_honest(records.into_iter(), |ctx, input_rows| async move {
-                    oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
-                        .await
-                        .unwrap()
+                    oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
+                        ctx,
+                        input_rows,
+                        None,
+                        dp_params,
+                        padding_params,
+                    )
+                    .await
+                    .unwrap()
                 })
                 .await
                 .reconstruct();
@@ -590,11 +637,18 @@ pub mod tests {
 
             records.shuffle(&mut thread_rng());
             let dp_params = DpMechanism::NoDp;
+            let padding_params = PaddingParameters::no_padding();
             let mut result: Vec<_> = world
                 .semi_honest(records.into_iter(), |ctx, input_rows| async move {
-                    oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(ctx, input_rows, None, dp_params)
-                        .await
-                        .unwrap()
+                    oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(
+                        ctx,
+                        input_rows,
+                        None,
+                        dp_params,
+                        padding_params,
+                    )
+                    .await
+                    .unwrap()
                 })
                 .await
                 .reconstruct();

diff --git a/ipa-core/src/protocol/ipa_prf/oprf_padding/README.md b/ipa-core/src/protocol/ipa_prf/oprf_padding/README.md
@@ -44,7 +44,9 @@ The process of drawing a sample from a Truncated Double Geometric will be done b
 4. We will use rejection sampleing from a double geometric to sample from a truncated double geometric.
 
 ### Sampling from the Geometric Distribuiton
-We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials.  <!-- The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$. -->
+We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials.
+
+The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$.
 
 ### Sampling from the Double Geometric Distribution
 We use the following from this [book](https://www.researchgate.net/publication/258697410_The_Laplace_Distribution_and_Generalizations) page 159.
@@ -56,7 +58,27 @@ $Y=\theta + X_1 - X_2$
 where $X_1$ and $X_2$ are iid geometric variables with success probability $p = 1 - e^{-1/s}$.  We use this relation to sample from the double geometric by first drawing two independent samples from $X_1$ and $X_2$ and then computing their difference plus the shift by $\theta$.
 
 
-<!-- The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 (\frac{1-p}{p^2})$ -->
+The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 * (\frac{1-p}{p^2})$
 
 ### Samples from the Truncated Double Geometric Distribution
 Once we can draw samples from a double geometric, we can sample from our desired truncated double geometric by sampling the double geometric with rejection if the sample lies outside the support set $\{0,...,2n\}$.
+
+The variance of a truncated double geometric distribution is (TODO), but the variance is always less than the variance of the underlying (non-truncated) double geometric distribution.
+
+# Padding Breakdowns Keys for Reveal Based Aggregation
+A new aggregation protocol reveals the breakdown keys in the clear before aggregating the associated secret
+shared values.   This leaks the number of records for each breakdown key.  We can assume that there is a cap
+enforced on the number of records for any one matchkey in IPA. Using this sensitivity we can then (with a desired epsilon,
+delta) generate a random padding number of dummy rows with each breakdown key.
+
+# Generating Padding for Matchkeys and Breakdown keys together
+We need to add fake rows for matchkeys and fake rows for breakdown keys.  It makes sense to try and add the fake breakdown
+keys to the fake rows already being generated for fake matchkeys. But this approach has a couple challenges:
+1. We shouldn't add any fake breakdown keys to fake matchkey rows when the matchkey is being added with cardinality equal to one.
+Because these rows can be dropped after matching and never have the fake breakdowns revealed.
+2. There may need to be some adjustment made to the DP parameters achieved. TODO
+3. We should not be adding fake breakdown keys to matchkeys that have a cardinality larger than the cap we have established for
+the number of breakdowns per user. Otherwise, those breakdown keys would never be revealed as they will be dropped.
+
+Instead of this approach we will the fake rows for matchkey padding first and then the fake rows for breakdown key padding. When
+we generate the fake rows for breakdown key padding, the fake matchkeys generated will all have cardinality two or three (and with small probability one).
diff --git a/ipa-core/src/protocol/ipa_prf/oprf_padding/distributions.rs b/ipa-core/src/protocol/ipa_prf/oprf_padding/distributions.rs
@@ -139,7 +139,7 @@ impl Distribution<i32> for DoubleGeometric {
 /// Truncated Double Geometric distribution.
 #[derive(Debug, PartialEq)]
 pub struct TruncatedDoubleGeometric {
-    shift_doubled: u32, // move 2 * shift to constructor instead of sample
+    pub shift_doubled: u32, // move 2 * shift to constructor instead of sample
     double_geometric: DoubleGeometric,
 }