Skip to content

Commit

Permalink
Merge pull request #433 from dvadym/quantile_tree_merge
Browse files Browse the repository at this point in the history
Implement QuantileTree.Merge method
  • Loading branch information
chinmayshah99 authored Oct 29, 2022
2 parents a409a4b + 0750b67 commit ab98495
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 10 deletions.
33 changes: 23 additions & 10 deletions src/bindings/PyDP/algorithms/qunatile_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@ std::unique_ptr<dp::QuantileTree<double>> CreateQuantileTree(double lower, doubl

dp::QuantileTree<double>::Privatized GetPrivatizeTree(
dp::QuantileTree<double>& tree, double epsilon, double delta,
int max_partitions_contributed_to, int max_contributions_per_partition,
int max_partitions_contributed, int max_contributions_per_partition,
const std::string& noise_type) {
dp::QuantileTree<double>::DPParams dp_params;
dp_params.epsilon = epsilon;
dp_params.delta = delta;
dp_params.max_contributions_per_partition = max_contributions_per_partition;
dp_params.max_partitions_contributed_to = max_partitions_contributed_to;
dp_params.max_partitions_contributed_to = max_partitions_contributed;
// Create DP mechanism.
if (noise_type == "laplace") {
dp_params.mechanism_builder = std::make_unique<dp::LaplaceMechanism::Builder>();
} else if (noise_type == "gaussian") {
dp_params.mechanism_builder = std::make_unique<dp::GaussianMechanism::Builder>();
} else {
throw py::value_error("noise_type can be 'laplace' or 'gaussian', but it is '" +
noise_type + "'./**/");
noise_type + "'.");
}
auto status_or_result = tree.MakePrivate(dp_params);
if (!status_or_result.ok()) {
Expand Down Expand Up @@ -89,15 +89,28 @@ void init_algorithms_quantile_tree(py::module& m) {
to_return.mutable_data()->PackFrom(obj.Serialize());
return to_return;
});
py_class.def("merge", &dp::QuantileTree<double>::Merge, py::arg("summary"));
py_class.def(
"merge",
[](dp::QuantileTree<double>& tree, const dp::Summary& summary) {
if (!summary.has_data()) {
throw std::runtime_error("Cannot merge summary, no data.");
}

dp::BoundedQuantilesSummary quantiles_summary;
if (!summary.data().UnpackTo(&quantiles_summary)) {
throw std::runtime_error("Fail to upack data");
}
tree.Merge(quantiles_summary);
},
py::arg("summary"));

py_class.def(
"compute_quantiles",
[](dp::QuantileTree<double>& tree, double epsilon, double delta,
int max_partitions_contributed_to, int max_contributions_per_partition,
int max_partitions_contributed, int max_contributions_per_partition,
const std::vector<double>& quantiles, const std::string& noise_type) {
dp::QuantileTree<double>::Privatized privatized_tree =
GetPrivatizeTree(tree, epsilon, delta, max_partitions_contributed_to,
GetPrivatizeTree(tree, epsilon, delta, max_partitions_contributed,
max_contributions_per_partition, noise_type);

std::vector<double> output;
Expand All @@ -110,18 +123,18 @@ void init_algorithms_quantile_tree(py::module& m) {
}
return output;
},
py::arg("epsilon"), py::arg("delta"), py::arg("max_partitions_contributed_to"),
py::arg("epsilon"), py::arg("delta"), py::arg("max_partitions_contributed"),
py::arg("max_contributions_per_partition"), py::arg("quantiles"),
py::arg("noise_type") = "laplace", "Compute multiple quantiles.");

py_class.def(
"compute_quantiles_and_confidence_intervals",
[](dp::QuantileTree<double>& tree, double epsilon, double delta,
int max_contributions_per_partition, int max_partitions_contributed_to,
int max_contributions_per_partition, int max_partitions_contributed,
const std::vector<double>& quantiles, double confidence_interval_level,
const std::string& noise_type) {
dp::QuantileTree<double>::Privatized privatized_tree =
GetPrivatizeTree(tree, epsilon, delta, max_partitions_contributed_to,
GetPrivatizeTree(tree, epsilon, delta, max_partitions_contributed,
max_contributions_per_partition, noise_type);

std::vector<QuantileConfidenceInterval> output;
Expand All @@ -142,7 +155,7 @@ void init_algorithms_quantile_tree(py::module& m) {
}
return output;
},
py::arg("epsilon"), py::arg("delta"), py::arg("max_partitions_contributed_to"),
py::arg("epsilon"), py::arg("delta"), py::arg("max_partitions_contributed"),
py::arg("max_contributions_per_partition"), py::arg("quantiles"),
py::arg("confidence_interval_level"), py::arg("noise_type") = "laplace",
"Compute multiple quantiles and confidence intervals for them.");
Expand Down
31 changes: 31 additions & 0 deletions tests/algorithms/test_quantile_tree.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

import pydp._pydp as dp
from pydp.algorithms.quantile_tree import QuantileTree


Expand Down Expand Up @@ -70,3 +71,33 @@ def test_quantiles_and_confidence_intervals(self):
<= dp_quantile_ci.upper_bound
)
assert dp_quantile_ci.upper_bound - dp_quantile_ci.lower_bound < 0.01

def test_serialize_deserialize(self):
lower, upper = 0, 1000
height, branching_factor = 5, 10
tree1 = QuantileTree(lower, upper, height, branching_factor)

# Add elements 0,..1000 to the tree.
for i in range(1001):
tree1.add_entry(i)

serialized_tree = tree1.serialize().to_bytes()

# Deserialize
# 1.Create empty tree with the same parameters.
tree2 = QuantileTree(lower, upper, height, branching_factor)
# 2. Merge serialized_tree to tree2.
tree2.merge(dp.bytes_to_summary(serialized_tree))

# Check that tree2 computes correct quantiles. For this use high
# epsilon, which means small noise and close to the real quantiles.
eps, delta = 10000, 0
quantiles_to_compute = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.9]
dp_quantiles = tree2.compute_quantiles(
eps, delta, 1, 1, quantiles_to_compute, "laplace"
)

# Check that DP quantiles are close to expected.
for quantile, dp_quantile in zip(quantiles_to_compute, dp_quantiles):
expected_quantile = quantile * upper
assert abs(expected_quantile - dp_quantile) < 0.1

0 comments on commit ab98495

Please sign in to comment.