DP Padding (#1225)

* all work from dp_padding branch as single commit * add logging for total fake records * table of parameters * relaxed parameters in oprf_ipa * PR feedback except separating aggregation padding * WIP Paddable trait to handle both OPRF and Agg * update trait bounds * more in Paddable trait * building passes * tests pass * increase test failure rate for Laplace tail bounds * address feedback * simplify logic for refering to roles * update comments * use default parameters instead of relaxed * address feedback, relaxed for OPRF * Remove commented code --------- Co-authored-by: Benjamin Case <bmcase@meta.com>
private-attribution · Aug 27, 2024 · 4a881f6 · 4a881f6
1 parent 108119f
commit 4a881f6
Show file tree

Hide file tree

Showing 18 changed files with 1,002 additions and 47 deletions.
diff --git a/ipa-core/build.rs b/ipa-core/build.rs
@@ -21,6 +21,7 @@ track_steps!(
  prf_sharding::step,
  shuffle::step,
  aggregation::step,
+ oprf_padding::step,
  step,
  },
  dp::step,

diff --git a/ipa-core/src/error.rs b/ipa-core/src/error.rs
@@ -82,10 +82,14 @@ pub enum Error {
  ParallelDZKPValidationFailed,
  #[error("Inconsistent shares")]
  InconsistentShares,
+ #[error("Inconsistent padding")]
+ InconsistentPadding,
  #[error("The Masks cannot be set safely, i.e. without deleting non-zero field elements")]
  DZKPMasks,
  #[error("Attempt to operate on zero records")]
  ZeroRecords,
+ #[error("DP related error: {0}")]
+ DPPaddingError(#[from] crate::protocol::ipa_prf::oprf_padding::insecure::DpError),
  #[error("Epsilon submitted to query is out of bounds")]
  EpsilonOutOfBounds,
  #[error("Missing total records in {0}")]

diff --git a/ipa-core/src/helpers/mod.rs b/ipa-core/src/helpers/mod.rs
@@ -303,6 +303,18 @@ impl Role {
  pub const fn eq(self, other: Self) -> bool {
  matches!((self, other), (H1, H1) | (H2, H2) | (H3, H3))
  }
+
+ /// Returns the direction to the peer with the specified role.
+ ///
+ /// If `self == role`, returns `None`.
+ #[must_use]
+ pub const fn direction_to(&self, role: Role) -> Option<Direction> {
+ match (self, role) {
+ (H1, H2) | (H2, H3) | (H3, H1) => Some(Direction::Right),
+ (H1, H3) | (H2, H1) | (H3, H2) => Some(Direction::Left),
+ (H1, H1) | (H2, H2) | (H3, H3) => None,
+ }
+ }
 }
 
 impl From<Role> for &'static str {

diff --git a/ipa-core/src/protocol/context/prss.rs b/ipa-core/src/protocol/context/prss.rs
@@ -1,7 +1,7 @@
 //! Metric-aware PRSS decorators
 
 use generic_array::{ArrayLength, GenericArray};
-use rand_core::{Error, RngCore};
+use rand_core::{CryptoRng, Error, RngCore};
 
 use crate::{
  helpers::{Direction, Role},
@@ -145,3 +145,5 @@ impl RngCore for InstrumentedSequentialSharedRandomness<'_> {
  self.inner.try_fill_bytes(dest)
  }
 }
+
+impl CryptoRng for InstrumentedSequentialSharedRandomness<'_> {}
diff --git a/ipa-core/src/protocol/ipa_prf/aggregation/breakdown_reveal.rs b/ipa-core/src/protocol/ipa_prf/aggregation/breakdown_reveal.rs
@@ -19,8 +19,11 @@ use crate::{
  basics::semi_honest_reveal,
  context::Context,
  ipa_prf::{
- aggregation::step::AggregationStep, prf_sharding::SecretSharedAttributionOutputs,
- shuffle::shuffle_attribution_outputs, BreakdownKey,
+ aggregation::step::AggregationStep,
+ oprf_padding::{apply_dp_padding, PaddingParameters},
+ prf_sharding::{AttributionOutputs, SecretSharedAttributionOutputs},
+ shuffle::shuffle_attribution_outputs,
+ BreakdownKey,
  },
  BooleanProtocols, RecordId,
  },
@@ -59,8 +62,18 @@ where
  BitDecomposed<Replicated<Boolean, B>>:
  for<'a> TransposeFrom<&'a [Replicated<TV>; B], Error = Infallible>,
 {
- let atributions = shuffle_attributions(&ctx, attributed_values).await?;
- let grouped_tvs = reveal_breakdowns(&ctx, atributions).await?;
+ let dp_padding_params = PaddingParameters::default();
+ // Apply DP padding for Breakdown Reveal Aggregation
+ let attributed_values_padded =
+ apply_dp_padding::<_, AttributionOutputs<Replicated<BK>, Replicated<TV>>, B>(
+ ctx.narrow(&AggregationStep::PaddingDp),
+ attributed_values,
+ dp_padding_params,
+ )
+ .await?;
+
+ let attributions = shuffle_attributions(&ctx, attributed_values_padded).await?;
+ let grouped_tvs = reveal_breakdowns(&ctx, attributions).await?;
  let num_rows = grouped_tvs.max_len;
  aggregate_values::<_, HV, B>(ctx, grouped_tvs.into_stream(), num_rows).await
 }

diff --git a/ipa-core/src/protocol/ipa_prf/aggregation/step.rs b/ipa-core/src/protocol/ipa_prf/aggregation/step.rs
@@ -5,6 +5,8 @@ pub(crate) enum AggregationStep {
  /// key. Aggregation based on move to bucket approach does not need them.
  /// When reveal-based aggregation is the default, other steps (such as `MoveToBucket`)
  /// should be deleted
+ #[step(child = crate::protocol::ipa_prf::oprf_padding::step::PaddingDpStep, name="padding_dp")]
+ PaddingDp,
  #[step(child = crate::protocol::ipa_prf::shuffle::step::OPRFShuffleStep)]
  Shuffle,
  RevealStep,

diff --git a/ipa-core/src/protocol/ipa_prf/mod.rs b/ipa-core/src/protocol/ipa_prf/mod.rs
@@ -25,6 +25,7 @@ use crate::{
  },
  ipa_prf::{
  boolean_ops::convert_to_fp25519,
+ oprf_padding::apply_dp_padding,
  prf_eval::{eval_dy_prf, gen_prf_key},
  prf_sharding::{
  attribute_cap_aggregate, histograms_ranges_sortkeys, PrfShardedIpaInputRow,
@@ -91,7 +92,9 @@ use step::IpaPrfStep as Step;
 
 use crate::{
  helpers::query::DpMechanism,
- protocol::{context::Validator, dp::dp_for_histogram},
+ protocol::{
+ context::Validator, dp::dp_for_histogram, ipa_prf::oprf_padding::PaddingParameters,
+ },
 };
 
 #[derive(Clone, Debug, Default)]
@@ -218,6 +221,7 @@ pub async fn oprf_ipa<'ctx, BK, TV, HV, TS, const SS_BITS: usize, const B: usize
  input_rows: Vec<OPRFIPAInputRow<BK, TV, TS>>,
  attribution_window_seconds: Option<NonZeroU32>,
  dp_params: DpMechanism,
+ dp_padding_params: PaddingParameters,
 ) -> Result<Vec<Replicated<HV>>, Error>
 where
  BK: BreakdownKey<B>,
@@ -247,7 +251,16 @@ where
  if input_rows.is_empty() {
  return Ok(vec![Replicated::ZERO; B]);
  }
- let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), input_rows).await?;
+
+ // Apply DP padding for OPRF
+ let padded_input_rows = apply_dp_padding::<_, OPRFIPAInputRow<BK, TV, TS>, B>(
+ ctx.narrow(&Step::PaddingDp),
+ input_rows,
+ dp_padding_params,
+ )
+ .await?;
+
+ let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), padded_input_rows).await?;
  let mut prfd_inputs = compute_prf_for_inputs(ctx.clone(), &shuffled).await?;
 
  prfd_inputs.sort_by(|a, b| a.prf_of_match_key.cmp(&b.prf_of_match_key));
@@ -376,7 +389,10 @@ pub mod tests {
  U128Conversions,
  },
  helpers::query::DpMechanism,
- protocol::{dp::NoiseParams, ipa_prf::oprf_ipa},
+ protocol::{
+ dp::NoiseParams,
+ ipa_prf::{oprf_ipa, oprf_padding::PaddingParameters},
+ },
  test_executor::run,
  test_fixture::{ipa::TestRawDataRecord, Reconstruct, Runner, TestWorld},
  };
@@ -410,14 +426,22 @@ pub mod tests {
  test_input(10, 12345, true, 0, 5),
  test_input(0, 68362, false, 1, 0),
  test_input(20, 68362, true, 0, 2),
- ];
+ ]; // trigger value of 2 attributes to earlier source row with breakdown 1 and trigger
+ // value of 5 attributes to source row with breakdown 2.
  let dp_params = DpMechanism::NoDp;
+ let padding_params = PaddingParameters::relaxed();
 
  let mut result: Vec<_> = world
  .semi_honest(records.into_iter(), |ctx, input_rows| async move {
- oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(ctx, input_rows, None, dp_params)
- .await
- .unwrap()
+ oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(
+ ctx,
+ input_rows,
+ None,
+ dp_params,
+ padding_params,
+ )
+ .await
+ .unwrap()
  })
  .await
  .reconstruct();
@@ -432,6 +456,8 @@ pub mod tests {
  #[test]
  fn semi_honest_with_dp() {
  const SS_BITS: usize = 1;
+ // setting SS_BITS this small will cause clipping in capping
+ // since per_user_credit_cap == 2^SS_BITS
  semi_honest_with_dp_internal::<SS_BITS>();
  }
  #[test]
@@ -451,6 +477,7 @@ pub mod tests {
  let epsilon = 10.0;
  let dp_params = DpMechanism::Binomial { epsilon };
  let per_user_credit_cap = 2_f64.powi(i32::try_from(SS_BITS).unwrap());
+ let padding_params = PaddingParameters::relaxed();
  let world = TestWorld::default();
 
  let records: Vec<TestRawDataRecord> = vec![
@@ -462,9 +489,15 @@ pub mod tests {
  ];
  let mut result: Vec<_> = world
  .semi_honest(records.into_iter(), |ctx, input_rows| async move {
- oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(ctx, input_rows, None, dp_params)
- .await
- .unwrap()
+ oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(
+ ctx,
+ input_rows,
+ None,
+ dp_params,
+ padding_params,
+ )
+ .await
+ .unwrap()
  })
  .await
  .reconstruct();
@@ -513,12 +546,19 @@ pub mod tests {
 
  let records: Vec<TestRawDataRecord> = vec![];
  let dp_params = DpMechanism::NoDp;
+ let padding_params = PaddingParameters::no_padding();
 
  let mut result: Vec<_> = world
  .semi_honest(records.into_iter(), |ctx, input_rows| async move {
- oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
- .await
- .unwrap()
+ oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
+ ctx,
+ input_rows,
+ None,
+ dp_params,
+ padding_params,
+ )
+ .await
+ .unwrap()
  })
  .await
  .reconstruct();
@@ -542,12 +582,19 @@ pub mod tests {
  test_input(0, 68362, false, 1, 0),
  ];
  let dp_params = DpMechanism::NoDp;
+ let padding_params = PaddingParameters::no_padding();
 
  let mut result: Vec<_> = world
  .semi_honest(records.into_iter(), |ctx, input_rows| async move {
- oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
- .await
- .unwrap()
+ oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
+ ctx,
+ input_rows,
+ None,
+ dp_params,
+ padding_params,
+ )
+ .await
+ .unwrap()
  })
  .await
  .reconstruct();
@@ -590,11 +637,18 @@ pub mod tests {
 
  records.shuffle(&mut thread_rng());
  let dp_params = DpMechanism::NoDp;
+ let padding_params = PaddingParameters::no_padding();
  let mut result: Vec<_> = world
  .semi_honest(records.into_iter(), |ctx, input_rows| async move {
- oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(ctx, input_rows, None, dp_params)
- .await
- .unwrap()
+ oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(
+ ctx,
+ input_rows,
+ None,
+ dp_params,
+ padding_params,
+ )
+ .await
+ .unwrap()
  })
  .await
  .reconstruct();

diff --git a/ipa-core/src/protocol/ipa_prf/oprf_padding/README.md b/ipa-core/src/protocol/ipa_prf/oprf_padding/README.md
@@ -44,7 +44,9 @@ The process of drawing a sample from a Truncated Double Geometric will be done b
 4. We will use rejection sampleing from a double geometric to sample from a truncated double geometric.
 
 ### Sampling from the Geometric Distribuiton
-We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials. <!-- The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$. -->
+We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials.
+
+The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$.
 
 ### Sampling from the Double Geometric Distribution
 We use the following from this [book](https://www.researchgate.net/publication/258697410_The_Laplace_Distribution_and_Generalizations) page 159.
@@ -56,7 +58,27 @@ $Y=\theta + X_1 - X_2$
 where $X_1$ and $X_2$ are iid geometric variables with success probability $p = 1 - e^{-1/s}$. We use this relation to sample from the double geometric by first drawing two independent samples from $X_1$ and $X_2$ and then computing their difference plus the shift by $\theta$.
 
 
-<!-- The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 (\frac{1-p}{p^2})$ -->
+The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 * (\frac{1-p}{p^2})$
 
 ### Samples from the Truncated Double Geometric Distribution
 Once we can draw samples from a double geometric, we can sample from our desired truncated double geometric by sampling the double geometric with rejection if the sample lies outside the support set $\{0,...,2n\}$.
+
+The variance of a truncated double geometric distribution is (TODO), but the variance is always less than the variance of the underlying (non-truncated) double geometric distribution.
+
+# Padding Breakdowns Keys for Reveal Based Aggregation
+A new aggregation protocol reveals the breakdown keys in the clear before aggregating the associated secret
+shared values. This leaks the number of records for each breakdown key. We can assume that there is a cap
+enforced on the number of records for any one matchkey in IPA. Using this sensitivity we can then (with a desired epsilon,
+delta) generate a random padding number of dummy rows with each breakdown key.
+
+# Generating Padding for Matchkeys and Breakdown keys together
+1. Would be to try and add the fake breakdown keys to the fake rows already being generated for fake matchkeys. But this 
+approach has a couple challenges:
+ 1. We shouldn't add any fake breakdown keys to fake matchkey rows when the matchkey is being added with cardinality 
+ equal to one. Because these rows can be dropped after matching and never have the fake breakdowns revealed.
+ 2. There may need to be some adjustment made to the DP parameters achieved.
+ 3. We should not be adding fake breakdown keys to matchkeys that have a cardinality larger than the cap we have established 
+ for the number of breakdowns per user. Otherwise, those breakdown keys would never be revealed as they will be dropped.
+2. The second approach we could consider is to add the fake rows for matchkey padding at the start of the protocol and then later 
+right before Breakdown Reveal Aggregation add the fake rows for breakdown key padding. This approach has the benefit of being more
+efficient in that we do not need to compute the OPRF of these fake rows which are added just-in-time for use in aggregation.
diff --git a/ipa-core/src/protocol/ipa_prf/oprf_padding/distributions.rs b/ipa-core/src/protocol/ipa_prf/oprf_padding/distributions.rs
@@ -139,7 +139,7 @@ impl Distribution<i32> for DoubleGeometric {
 /// Truncated Double Geometric distribution.
 #[derive(Debug, PartialEq)]
 pub struct TruncatedDoubleGeometric {
- shift_doubled: u32, // move 2 * shift to constructor instead of sample
+ pub shift_doubled: u32, // move 2 * shift to constructor instead of sample
  double_geometric: DoubleGeometric,
 }