Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DP Padding #1225

Merged
merged 19 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ipa-core/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ track_steps!(
prf_sharding::step,
shuffle::step,
aggregation::step,
oprf_padding::step,
step,
},
dp::step,
Expand Down
2 changes: 2 additions & 0 deletions ipa-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ pub enum Error {
DZKPMasks,
#[error("Attempt to operate on zero records")]
ZeroRecords,
#[error("DP related error: {0}")]
DPPaddingError(String),
#[error("Epsilon submitted to query is out of bounds")]
EpsilonOutOfBounds,
#[error("Missing total records in {0}")]
Expand Down
4 changes: 3 additions & 1 deletion ipa-core/src/protocol/context/prss.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Metric-aware PRSS decorators

use generic_array::{ArrayLength, GenericArray};
use rand_core::{Error, RngCore};
use rand_core::{CryptoRng, Error, RngCore};

use crate::{
helpers::{Direction, Role},
Expand Down Expand Up @@ -145,3 +145,5 @@ impl RngCore for InstrumentedSequentialSharedRandomness<'_> {
self.inner.try_fill_bytes(dest)
}
}

impl CryptoRng for InstrumentedSequentialSharedRandomness<'_> {}
19 changes: 16 additions & 3 deletions ipa-core/src/protocol/ipa_prf/aggregation/breakdown_reveal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ use crate::{
basics::semi_honest_reveal,
context::Context,
ipa_prf::{
aggregation::step::AggregationStep, prf_sharding::SecretSharedAttributionOutputs,
shuffle::shuffle_attribution_outputs, BreakdownKey,
aggregation::step::AggregationStep,
oprf_padding::{apply_dp_padding, PaddingParameters},
prf_sharding::{AttributionOutputs, SecretSharedAttributionOutputs},
shuffle::shuffle_attribution_outputs,
BreakdownKey,
},
BooleanProtocols, RecordId,
},
Expand Down Expand Up @@ -59,7 +62,17 @@ where
BitDecomposed<Replicated<Boolean, B>>:
for<'a> TransposeFrom<&'a [Replicated<TV>; B], Error = Infallible>,
{
let atributions = shuffle_attributions(&ctx, attributed_values).await?;
let dp_padding_params = PaddingParameters::relaxed();
// Apply DP padding for Breakdown Reveal Aggregation
let attributed_values_padded =
apply_dp_padding::<_, AttributionOutputs<Replicated<BK>, Replicated<TV>>, B>(
ctx.narrow(&AggregationStep::PaddingDp),
attributed_values,
dp_padding_params,
)
.await?;

let atributions = shuffle_attributions(&ctx, attributed_values_padded).await?;
let grouped_tvs = reveal_breakdowns(&ctx, atributions).await?;
let num_rows = grouped_tvs.max_len;
aggregate_values::<_, HV, B>(ctx, grouped_tvs.into_stream(), num_rows).await
Expand Down
2 changes: 2 additions & 0 deletions ipa-core/src/protocol/ipa_prf/aggregation/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ pub(crate) enum AggregationStep {
/// key. Aggregation based on move to bucket approach does not need them.
/// When reveal-based aggregation is the default, other steps (such as `MoveToBucket`)
/// should be deleted
#[step(child = crate::protocol::ipa_prf::oprf_padding::step::PaddingDpStep, name="padding_dp")]
PaddingDp,
#[step(child = crate::protocol::ipa_prf::shuffle::step::OPRFShuffleStep)]
Shuffle,
RevealStep,
Expand Down
92 changes: 73 additions & 19 deletions ipa-core/src/protocol/ipa_prf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use crate::{
},
ipa_prf::{
boolean_ops::convert_to_fp25519,
oprf_padding::apply_dp_padding,
prf_eval::{eval_dy_prf, gen_prf_key},
prf_sharding::{
attribute_cap_aggregate, histograms_ranges_sortkeys, PrfShardedIpaInputRow,
Expand Down Expand Up @@ -91,7 +92,9 @@ use step::IpaPrfStep as Step;

use crate::{
helpers::query::DpMechanism,
protocol::{context::Validator, dp::dp_for_histogram},
protocol::{
context::Validator, dp::dp_for_histogram, ipa_prf::oprf_padding::PaddingParameters,
},
};

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -218,6 +221,7 @@ pub async fn oprf_ipa<'ctx, BK, TV, HV, TS, const SS_BITS: usize, const B: usize
input_rows: Vec<OPRFIPAInputRow<BK, TV, TS>>,
attribution_window_seconds: Option<NonZeroU32>,
dp_params: DpMechanism,
dp_padding_params: PaddingParameters,
) -> Result<Vec<Replicated<HV>>, Error>
where
BK: BreakdownKey<B>,
Expand Down Expand Up @@ -247,7 +251,16 @@ where
if input_rows.is_empty() {
return Ok(vec![Replicated::ZERO; B]);
}
let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), input_rows).await?;

// Apply DP padding for OPRF
let padded_input_rows = apply_dp_padding::<_, OPRFIPAInputRow<BK, TV, TS>, B>(
ctx.narrow(&Step::PaddingDp),
input_rows,
dp_padding_params,
)
.await?;

let shuffled = shuffle_inputs(ctx.narrow(&Step::Shuffle), padded_input_rows).await?;
let mut prfd_inputs = compute_prf_for_inputs(ctx.clone(), &shuffled).await?;

prfd_inputs.sort_by(|a, b| a.prf_of_match_key.cmp(&b.prf_of_match_key));
Expand Down Expand Up @@ -376,7 +389,10 @@ pub mod tests {
U128Conversions,
},
helpers::query::DpMechanism,
protocol::{dp::NoiseParams, ipa_prf::oprf_ipa},
protocol::{
dp::NoiseParams,
ipa_prf::{oprf_ipa, oprf_padding::PaddingParameters},
},
test_executor::run,
test_fixture::{ipa::TestRawDataRecord, Reconstruct, Runner, TestWorld},
};
Expand Down Expand Up @@ -410,14 +426,22 @@ pub mod tests {
test_input(10, 12345, true, 0, 5),
test_input(0, 68362, false, 1, 0),
test_input(20, 68362, true, 0, 2),
];
]; // trigger value of 2 attributes to earlier source row with breakdown 1 and trigger
// value of 5 attributes to source row with breakdown 2.
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::relaxed();

let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA16, BA20, 5, 32>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand All @@ -432,6 +456,8 @@ pub mod tests {
#[test]
fn semi_honest_with_dp() {
const SS_BITS: usize = 1;
// setting SS_BITS this small will cause clipping in capping
// since per_user_credit_cap == 2^SS_BITS
semi_honest_with_dp_internal::<SS_BITS>();
}
#[test]
Expand All @@ -451,6 +477,7 @@ pub mod tests {
let epsilon = 10.0;
let dp_params = DpMechanism::Binomial { epsilon };
let per_user_credit_cap = 2_f64.powi(i32::try_from(SS_BITS).unwrap());
let padding_params = PaddingParameters::relaxed();
let world = TestWorld::default();

let records: Vec<TestRawDataRecord> = vec![
Expand All @@ -462,9 +489,15 @@ pub mod tests {
];
let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA16, BA20, SS_BITS, B>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand Down Expand Up @@ -513,12 +546,19 @@ pub mod tests {

let records: Vec<TestRawDataRecord> = vec![];
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::no_padding();

let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand All @@ -542,12 +582,19 @@ pub mod tests {
test_input(0, 68362, false, 1, 0),
];
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::no_padding();

let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA5, BA3, BA8, BA20, 5, 32>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand Down Expand Up @@ -590,11 +637,18 @@ pub mod tests {

records.shuffle(&mut thread_rng());
let dp_params = DpMechanism::NoDp;
let padding_params = PaddingParameters::no_padding();
let mut result: Vec<_> = world
.semi_honest(records.into_iter(), |ctx, input_rows| async move {
oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(ctx, input_rows, None, dp_params)
.await
.unwrap()
oprf_ipa::<BA8, BA3, BA16, BA20, 5, 256>(
ctx,
input_rows,
None,
dp_params,
padding_params,
)
.await
.unwrap()
})
.await
.reconstruct();
Expand Down
26 changes: 24 additions & 2 deletions ipa-core/src/protocol/ipa_prf/oprf_padding/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ The process of drawing a sample from a Truncated Double Geometric will be done b
4. We will use rejection sampleing from a double geometric to sample from a truncated double geometric.

### Sampling from the Geometric Distribuiton
We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials. <!-- The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$. -->
We take the Geometric Distribution to be the probability distribution of the number of failures of Bernoulli trials before the first success, supported on the set $\{0,1,2,...\}$, with $0 < p \leq 1$ the success probability of the Bernoulli trials.

The mean of the geometric is $\mu = \frac{1-p}{p}$ and variance is $\sigma^2 = \frac{1-p}{p^2}$.

### Sampling from the Double Geometric Distribution
We use the following from this [book](https://www.researchgate.net/publication/258697410_The_Laplace_Distribution_and_Generalizations) page 159.
Expand All @@ -56,7 +58,27 @@ $Y=\theta + X_1 - X_2$
where $X_1$ and $X_2$ are iid geometric variables with success probability $p = 1 - e^{-1/s}$. We use this relation to sample from the double geometric by first drawing two independent samples from $X_1$ and $X_2$ and then computing their difference plus the shift by $\theta$.


<!-- The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 (\frac{1-p}{p^2})$ -->
The variance of a double geometric is the sum of the variances of the two independent geometrics, $X_1$ and $X_2$, so is $2 * (\frac{1-p}{p^2})$

### Samples from the Truncated Double Geometric Distribution
Once we can draw samples from a double geometric, we can sample from our desired truncated double geometric by sampling the double geometric with rejection if the sample lies outside the support set $\{0,...,2n\}$.

The variance of a truncated double geometric distribution is (TODO), but the variance is always less than the variance of the underlying (non-truncated) double geometric distribution.

# Padding Breakdowns Keys for Reveal Based Aggregation
A new aggregation protocol reveals the breakdown keys in the clear before aggregating the associated secret
shared values. This leaks the number of records for each breakdown key. We can assume that there is a cap
enforced on the number of records for any one matchkey in IPA. Using this sensitivity we can then (with a desired epsilon,
delta) generate a random padding number of dummy rows with each breakdown key.

# Generating Padding for Matchkeys and Breakdown keys together
We need to add fake rows for matchkeys and fake rows for breakdown keys. It makes sense to try and add the fake breakdown
keys to the fake rows already being generated for fake matchkeys. But this approach has a couple challenges:
1. We shouldn't add any fake breakdown keys to fake matchkey rows when the matchkey is being added with cardinality equal to one.
Because these rows can be dropped after matching and never have the fake breakdowns revealed.
2. There may need to be some adjustment made to the DP parameters achieved. TODO
3. We should not be adding fake breakdown keys to matchkeys that have a cardinality larger than the cap we have established for
the number of breakdowns per user. Otherwise, those breakdown keys would never be revealed as they will be dropped.

Instead of this approach we will the fake rows for matchkey padding first and then the fake rows for breakdown key padding. When
we generate the fake rows for breakdown key padding, the fake matchkeys generated will all have cardinality two or three (and with small probability one).
bmcase marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl Distribution<i32> for DoubleGeometric {
/// Truncated Double Geometric distribution.
#[derive(Debug, PartialEq)]
pub struct TruncatedDoubleGeometric {
shift_doubled: u32, // move 2 * shift to constructor instead of sample
pub shift_doubled: u32, // move 2 * shift to constructor instead of sample
bmcase marked this conversation as resolved.
Show resolved Hide resolved
double_geometric: DoubleGeometric,
}

Expand Down
Loading
Loading