Skip to content

Commit

Permalink
DP Padding (#1225)
Browse files Browse the repository at this point in the history
* all work from dp_padding branch as single commit

* add logging for total fake records

* table of parameters

* relaxed parameters in oprf_ipa

* PR feedback except separating aggregation padding

* WIP Paddable trait to handle both OPRF and Agg

* update trait bounds

* more in Paddable trait

* building passes

* tests pass

* increase test failure rate for Laplace tail bounds

* address feedback

* simplify logic for refering to roles

* update comments

* use default parameters instead of relaxed

* address feedback, relaxed for OPRF

* Remove commented code

---------

Co-authored-by: Benjamin Case <bmcase@meta.com>
  • Loading branch information
bmcase and Benjamin Case committed Aug 27, 2024
1 parent 108119f commit 4a881f6
Show file tree
Hide file tree
Showing 18 changed files with 1,002 additions and 47 deletions.
1 change: 1 addition & 0 deletions ipa-core/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ track_steps!(
prf_sharding::step,
shuffle::step,
aggregation::step,
oprf_padding::step,
step,
},
dp::step,
Expand Down
4 changes: 4 additions & 0 deletions ipa-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,14 @@ pub enum Error {
ParallelDZKPValidationFailed,
#[error("Inconsistent shares")]
InconsistentShares,
#[error("Inconsistent padding")]
InconsistentPadding,
#[error("The Masks cannot be set safely, i.e. without deleting non-zero field elements")]
DZKPMasks,
#[error("Attempt to operate on zero records")]
ZeroRecords,
#[error("DP related error: {0}")]
DPPaddingError(#[from] crate::protocol::ipa_prf::oprf_padding::insecure::DpError),
#[error("Epsilon submitted to query is out of bounds")]
EpsilonOutOfBounds,
#[error("Missing total records in {0}")]
Expand Down
12 changes: 12 additions & 0 deletions ipa-core/src/helpers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,18 @@ impl Role {
pub const fn eq(self, other: Self) -> bool {
matches!((self, other), (H1, H1) | (H2, H2) | (H3, H3))
}

/// Returns the direction to the peer with the specified role.
///
/// If `self == role`, returns `None`.
#[must_use]
pub const fn direction_to(&self, role: Role) -> Option<Direction> {
match (self, role) {
(H1, H2) | (H2, H3) | (H3, H1) => Some(Direction::Right),
(H1, H3) | (H2, H1) | (H3, H2) => Some(Direction::Left),
(H1, H1) | (H2, H2) | (H3, H3) => None,
}
}
}

impl From<Role> for &'static str {
Expand Down
4 changes: 3 additions & 1 deletion ipa-core/src/protocol/context/prss.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Metric-aware PRSS decorators

use generic_array::{ArrayLength, GenericArray};
use rand_core::{Error, RngCore};
use rand_core::{CryptoRng, Error, RngCore};

use crate::{
helpers::{Direction, Role},
Expand Down Expand Up @@ -145,3 +145,5 @@ impl RngCore for InstrumentedSequentialSharedRandomness<'_> {
self.inner.try_fill_bytes(dest)
}
}

impl CryptoRng for InstrumentedSequentialSharedRandomness<'_> {}
21 changes: 17 additions & 4 deletions ipa-core/src/protocol/ipa_prf/aggregation/breakdown_reveal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ use crate::{
basics::semi_honest_reveal,
context::Context,
ipa_prf::{
aggregation::step::AggregationStep, prf_sharding::SecretSharedAttributionOutputs,
shuffle::shuffle_attribution_outputs, BreakdownKey,
aggregation::step::AggregationStep,
oprf_padding::{apply_dp_padding, PaddingParameters},
prf_sharding::{AttributionOutputs, SecretSharedAttributionOutputs},
shuffle::shuffle_attribution_outputs,
BreakdownKey,
},
BooleanProtocols, RecordId,
},
Expand Down Expand Up @@ -59,8 +62,18 @@ where
BitDecomposed<Replicated<Boolean, B>>:
for<'a> TransposeFrom<&'a [Replicated<TV>; B], Error = Infallible>,
{
let atributions = shuffle_attributions(&ctx, attributed_values).await?;
let grouped_tvs = reveal_breakdowns(&ctx, atributions).await?;
let dp_padding_params = PaddingParameters::default();
// Apply DP padding for Breakdown Reveal Aggregation
let attributed_values_padded =
apply_dp_padding::<_, AttributionOutputs<Replicated<BK>, Replicated<TV>>, B>(
ctx.narrow(&AggregationStep::PaddingDp),
attributed_values,
dp_padding_params,
)
.await?;

let attributions = shuffle_attributions(&ctx, attributed_values_padded).await?;
let grouped_tvs = reveal_breakdowns(&ctx, attributions).await?;
let num_rows = grouped_tvs.max_len;
aggregate_values::<_, HV, B>(ctx, grouped_tvs.into_stream(), num_rows).await
}
Expand Down
2 changes: 2 additions & 0 deletions ipa-core/src/protocol/ipa_prf/aggregation/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ pub(crate) enum AggregationStep {
/// key. Aggregation based on move to bucket approach does not need them.
/// When reveal-based aggregation is the default, other steps (such as `MoveToBucket`)
/// should be deleted
#[step(child = crate::protocol::ipa_prf::oprf_padding::step::PaddingDpStep, name="padding_dp")]
PaddingDp,
#[step(child = crate::protocol::ipa_prf::shuffle::step::OPRFShuffleStep)]
Shuffle,
RevealStep,
Expand Down
92 changes: 73 additions & 19 deletions ipa-core/src/protocol/ipa_prf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use crate::{
},
ipa_prf::{
boolean_ops::convert_to_fp25519,
oprf_padding::apply_dp_padding,
prf_eval::{eval_dy_prf, gen_prf_key},
prf_sharding::{
attribute_cap_aggregate, histograms_ranges_sortkeys, PrfShardedIpaInputRow,
Expand Down Expand Up @@ -91,7 +92,9 @@ use step::IpaPrfStep as Step;

use crate::{
helpers::query::DpMechanism,
protocol::{context::Validator, dp::dp_for_histogram},
protocol::{
context::Validator, dp::dp_for_histogram, ipa_prf::oprf_padding::PaddingParameters,
},
};

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -218,6 +221,7 @@ pub async fn oprf_ipa<'ctx, BK, TV, HV, TS, const SS_BITS: usize, const B: usize
input_rows: Vec<OPRFIPAInputRow<BK, TV, TS>>,
attribution_window_seconds: Option<NonZeroU32>,
dp_params: DpMechanism,
dp_padding_params: PaddingParameters,
) -> Result<Vec<Replicated<HV>>, Error>
where
BK: BreakdownKey<B>,
Expand Down Expand Up @@ -247,7 +251,16 @@ where
if input_rows.is_empty() {
return Ok(vec![Replicated::ZERO; B]);
}
let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), input_rows).await?;

// Apply DP padding for OPRF
let padded_input_rows = apply_dp_padding::<_, OPRFIPAInputRow<BK, TV, TS>, B>(
ctx.narrow(&Step::PaddingDp),
input_rows,
dp_padding_params,
)
.await?;

let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), padded_input_rows).await?;
let mut prfd_inputs = compute_prf_for_inputs(ctx.clone(), &shuffled).await?;

prfd_inputs.sort_by(|a, b| a.prf_of_match_key.cmp(&b.prf_of_match_key));
Expand Down Expand Up @@ -376,7 +389,10 @@ pub mod tests {
U128Conversions,
},
helpers::query::DpMechanism,
protocol::{dp::NoiseParams, ipa_prf::oprf_ipa},
protocol::{
dp::NoiseParams,
ipa_prf::{oprf_ipa, oprf_padding::PaddingParameters},
},
test_executor::run,
test_fixture::{ipa::TestRawDataRecord, Reconstruct, Runner, TestWorld},
};
Expand Down Expand Up @@ -410,14 +426,22 @@ pub mod tests {
test_input(10, 12345, true, 0, 5),
test_input(0, 68362, false, 1, 0),
test_input(20, 68362, true, 0, 2),
];
]; // trigger value of 2 attributes to earlier source row with breakdown 1 and trigger
// value of 5 attributes to source row with breakdown 2.
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::relaxed();

let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand All @@ -432,6 +456,8 @@ pub mod tests {
#[test]
fn semi_honest_with_dp() {
const SS_BITS: usize = 1;
// setting SS_BITS this small will cause clipping in capping
// since per_user_credit_cap == 2^SS_BITS
semi_honest_with_dp_internal::<SS_BITS>();
}
#[test]
Expand All @@ -451,6 +477,7 @@ pub mod tests {
let epsilon = 10.0;
let dp_params = DpMechanism::Binomial { epsilon };
let per_user_credit_cap = 2_f64.powi(i32::try_from(SS_BITS).unwrap());
let padding_params = PaddingParameters::relaxed();
let world = TestWorld::default();

let records: Vec<TestRawDataRecord> = vec![
Expand All @@ -462,9 +489,15 @@ pub mod tests {
];
let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand Down Expand Up @@ -513,12 +546,19 @@ pub mod tests {

let records: Vec<TestRawDataRecord> = vec![];
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::no_padding();

let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand All @@ -542,12 +582,19 @@ pub mod tests {
test_input(0, 68362, false, 1, 0),
];
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::no_padding();

let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand Down Expand Up @@ -590,11 +637,18 @@ pub mod tests {

records.shuffle(&mut thread_rng());
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::no_padding();
let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand Down
26 changes: 24 additions & 2 deletions ipa-core/src/protocol/ipa_prf/oprf_padding/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ The process of drawing a sample from a Truncated Double Geometric will be done b
4. We will use rejection sampleing from a double geometric to sample from a truncated double geometric.

### Sampling from the Geometric Distribuiton
We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials. <!-- The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$. -->
We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials.

The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$.

### Sampling from the Double Geometric Distribution
We use the following from this [book](https://www.researchgate.net/publication/258697410_The_Laplace_Distribution_and_Generalizations) page 159.
Expand All @@ -56,7 +58,27 @@ $Y=\theta + X_1 - X_2$
where $X_1$ and $X_2$ are iid geometric variables with success probability $p = 1 - e^{-1/s}$. We use this relation to sample from the double geometric by first drawing two independent samples from $X_1$ and $X_2$ and then computing their difference plus the shift by $\theta$.


<!-- The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 (\frac{1-p}{p^2})$ -->
The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 * (\frac{1-p}{p^2})$

### Samples from the Truncated Double Geometric Distribution
Once we can draw samples from a double geometric, we can sample from our desired truncated double geometric by sampling the double geometric with rejection if the sample lies outside the support set $\{0,...,2n\}$.

The variance of a truncated double geometric distribution is (TODO), but the variance is always less than the variance of the underlying (non-truncated) double geometric distribution.

# Padding Breakdowns Keys for Reveal Based Aggregation
A new aggregation protocol reveals the breakdown keys in the clear before aggregating the associated secret
shared values. This leaks the number of records for each breakdown key. We can assume that there is a cap
enforced on the number of records for any one matchkey in IPA. Using this sensitivity we can then (with a desired epsilon,
delta) generate a random padding number of dummy rows with each breakdown key.

# Generating Padding for Matchkeys and Breakdown keys together
1. Would be to try and add the fake breakdown keys to the fake rows already being generated for fake matchkeys. But this
approach has a couple challenges:
1. We shouldn't add any fake breakdown keys to fake matchkey rows when the matchkey is being added with cardinality
equal to one. Because these rows can be dropped after matching and never have the fake breakdowns revealed.
2. There may need to be some adjustment made to the DP parameters achieved.
3. We should not be adding fake breakdown keys to matchkeys that have a cardinality larger than the cap we have established
for the number of breakdowns per user. Otherwise, those breakdown keys would never be revealed as they will be dropped.
2. The second approach we could consider is to add the fake rows for matchkey padding at the start of the protocol and then later
right before Breakdown Reveal Aggregation add the fake rows for breakdown key padding. This approach has the benefit of being more
efficient in that we do not need to compute the OPRF of these fake rows which are added just-in-time for use in aggregation.
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl Distribution<i32> for DoubleGeometric {
/// Truncated Double Geometric distribution.
#[derive(Debug, PartialEq)]
pub struct TruncatedDoubleGeometric {
shift_doubled: u32, // move 2 * shift to constructor instead of sample
pub shift_doubled: u32, // move 2 * shift to constructor instead of sample
double_geometric: DoubleGeometric,
}

Expand Down
Loading

0 comments on commit 4a881f6

Please sign in to comment.