Skip to content

Commit

Permalink
Add keyword processing callback (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
fboemer authored Sep 3, 2024
1 parent 9a7b256 commit 466b67c
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,30 @@ struct KeywordPirBenchmarkContext<IndexServer: IndexPirServer, IndexClient: Inde
unevenDimensions: true,
keyCompression: keyCompression)

func logEvent(event: ProcessKeywordDatabase.ProcessShardEvent) throws {
switch event {
case let .cuckooTableEvent(CuckooTable.Event.createdTable(table)):
let summary = try table.summarize()
print("Created cuckoo table \(summary)")
case let .cuckooTableEvent(.expandingTable(table)):
let summary = try table.summarize()
print("Expanding cuckoo table \(summary)")
case let .cuckooTableEvent(.finishedExpandingTable(table)):
let summary = try table.summarize()
print("Finished expanding cuckoo table \(summary)")
case let .cuckooTableEvent(.insertedKeywordValuePair(index, _)):
let reportingPercentage = 10
let shardFraction = databaseCount / reportingPercentage
if (index + 1).isMultiple(of: shardFraction) {
let percentage = Float(reportingPercentage * (index + 1)) / Float(shardFraction)
print("Inserted \(index + 1) / \(databaseCount) keywords \(percentage)%")
}
}
}

let processed = try Server.process(database: rows,
config: config,
with: context)
with: context, onEvent: logEvent)

self.server = try Server(context: context, processed: processed)
self.client = Client(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,31 @@ cat /tmp/database-v1.txtpb /tmp/database-update.txtpb > /tmp/database-v2.txtpb
To ensure processing the update database yields the same configuration, we use the `.fixedSize` cuckoo table argument, specifying a bucket count.
A larger bucket count will leave more room for new entries, without changing the configuration.
However, a larger bucket count will also increase server runtime.
One way to choose the `bucketCount` is to start with `bucketCount : 1` and try larger `bucketCounts` until the processing works.
If the processing throws a `PirError.failedToConstructCuckooTable` or logs `Failed to construct Cuckoo table`, this is an indication the chosen bucket count was too small.

We create `/tmp/config-v1-fixed-size.json` with the following contents
There are a few ways to find a good `bucketCount`:
* Start with a small bucket count.
If the processing throws a `PirError.failedToConstructCuckooTable` or logs `Failed to construct Cuckoo table`, this is an indication the chosen bucket count was too small.
Choose larger `bucketCounts` until the processing works.

* Add a callback to [ProcessKeywordDatabase.processShard](https://swiftpackageindex.com/apple/swift-homomorphic-encryption/main/documentation/privateinformationretrieval/processkeyworddatabase/processshard(shard:with:)).
This callback can be used to report the bucketCount after the cuckoo table was created.
A sample callback is
```swift
func onEvent(event: ProcessKeywordDatabase.ProcessShardEvent) throws {
switch event {
case let .cuckooTableEvent(.createdTable(table)):
let summary = try table.summarize()
let bucketCount = summary.bucketCount
default:
()
}
}
```

For our example, we use `bucketCount: 256`.

We create `/tmp/config-v1-fixed-size.json` with the following contents
```json
{
"algorithm" : "mulPir",
"cuckooTableArguments" : {
Expand Down
29 changes: 27 additions & 2 deletions Sources/PIRProcessDatabase/ProcessDatabase.swift
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,33 @@ struct ProcessDatabase: ParsableCommand {
for (shardID, shard) in keywordDatabase.shards
.sorted(by: { $0.0.localizedStandardCompare($1.0) == .orderedAscending })
{
ProcessDatabase.logger.info("Processing shard \(shardID)")
let processed = try ProcessKeywordDatabase.processShard(shard: shard, with: processArgs)
func logEvent(event: ProcessKeywordDatabase.ProcessShardEvent) throws {
switch event {
case let .cuckooTableEvent(.createdTable(table)):
let summary = try table.summarize()
ProcessDatabase.logger.info("Created cuckoo table \(summary)")
case let .cuckooTableEvent(.expandingTable(table)):
let summary = try table.summarize()
ProcessDatabase.logger.info("Expanding cuckoo table \(summary)")
case let .cuckooTableEvent(.finishedExpandingTable(table)):
let summary = try table.summarize()
ProcessDatabase.logger.info("Finished expanding cuckoo table \(summary)")
case let .cuckooTableEvent(.insertedKeywordValuePair(index, _)):
let reportingPercentage = 10
let shardFraction = shard.rows.count / reportingPercentage
if (index + 1).isMultiple(of: shardFraction) {
let percentage = Float(reportingPercentage * (index + 1)) / Float(shardFraction)
ProcessDatabase.logger
.info("Inserted \(index + 1) / \(shard.rows.count) keywords \(percentage)%")
}
}
}

ProcessDatabase.logger.info("Processing shard \(shardID) with \(shard.rows.count) rows")
let processed = try ProcessKeywordDatabase.processShard(
shard: shard,
with: processArgs,
onEvent: logEvent)
if config.trialsPerShard > 0 {
guard let row = shard.rows.first else {
throw PirError.emptyDatabase
Expand Down
50 changes: 40 additions & 10 deletions Sources/PrivateInformationRetrieval/CuckooTable.swift
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,20 @@ extension CuckooBucket: RangeReplaceableCollection {

/// A Cuckoo table is a data structure that stores a set of keyword-value pairs, using cuckoo hashing to resolve
/// conflicts.
@usableFromInline
struct CuckooTable {
public struct CuckooTable {
typealias KeywordHash = UInt64
struct CuckooTableInformation: Equatable {
let entryCount: Int
let bucketCount: Int
let emptyBucketCount: Int
let loadFactor: Float
/// Information about the cuckoo table.
public struct CuckooTableInformation: Equatable {
/// The number of entries stored in the table.
public let entryCount: Int
/// The number of cuckoo buckets in the table.
public let bucketCount: Int
/// The number of empty buckets.
public let emptyBucketCount: Int
/// The fraction of capacity that is occupied.
///
/// A small load factor indicates there is unused capacity in the table.
public let loadFactor: Float
}

@usableFromInline
Expand All @@ -245,8 +251,22 @@ struct CuckooTable {
}
}

@usableFromInline let config: CuckooTableConfig
/// Events happening in a ``CuckooTable``.
public enum Event {
/// The table was initialized.
case createdTable(CuckooTable)
/// The table is being expanded.
case expandingTable(CuckooTable)
/// The table is done expanding.
case finishedExpandingTable(CuckooTable)
/// The `index'th` keyword-value pair was inserted.
case insertedKeywordValuePair(index: Int, KeywordValuePair)
}

/// Configuration used to create the table.
public let config: CuckooTableConfig
@usableFromInline var buckets: [CuckooBucket]
@usableFromInline let onEvent: (Event) throws -> Void
@usableFromInline var rng: RandomNumberGenerator

@usableFromInline var entryCount: Int {
Expand All @@ -259,23 +279,27 @@ struct CuckooTable {
init(
config: CuckooTableConfig,
database: some Collection<(KeywordValuePair.Keyword, KeywordValuePair.Value)>,
onEvent: @escaping (Event) throws -> Void = { _ in },
using rng: RandomNumberGenerator = SystemRandomNumberGenerator()) throws
{
try self.init(
config: config,
database: database.map { keyword, value in KeywordValuePair(keyword: keyword, value: value) },
onEvent: onEvent,
using: rng)
}

@inlinable
init(
config: CuckooTableConfig,
database: some Collection<KeywordValuePair>,
onEvent: @escaping (Event) throws -> Void = { _ in },
using rng: RandomNumberGenerator = SystemRandomNumberGenerator()) throws
{
self.config = config
let targetBucketCount: Int
self.buckets = []
self.onEvent = onEvent
self.rng = rng
switch config.bucketCount {
case let .allowExpansion(_, targetLoadFactor: targetLoadFactor):
Expand All @@ -290,12 +314,16 @@ struct CuckooTable {
}
self.buckets = Array(repeating: CuckooBucket(), count: targetBucketCount)

for keywordValuePair in database {
for (index, keywordValuePair) in database.enumerated() {
try insert(keywordValuePair)
try onEvent(Event.insertedKeywordValuePair(index: index, keywordValuePair))
}
try onEvent(Event.createdTable(self))
}

func summarize() throws -> CuckooTableInformation {
/// Creates a summary of the cuckoo table.
/// - Throws: Error upon failure to summary the table.
public func summarize() throws -> CuckooTableInformation {
let bucketEntryCounts = buckets.map(\.count)
let emptyBucketCount: Int = bucketEntryCounts.map { entryCount in entryCount == 0 ? 1 : 0 }.sum()
let entryCount: Int = bucketEntryCounts.sum()
Expand Down Expand Up @@ -403,6 +431,7 @@ struct CuckooTable {
mutating func expand() throws {
switch config.bucketCount {
case let .allowExpansion(expansionFactor: expansionFactor, _):
try onEvent(Event.expandingTable(self))
let oldTable = buckets
let bucketCount = Int(ceil(Double(buckets.count) * expansionFactor)).nextMultiple(
of: tableCount,
Expand All @@ -417,6 +446,7 @@ struct CuckooTable {
}
}
}
try onEvent(Event.finishedExpandingTable(self))
default:
throw PirError
.failedToConstructCuckooTable(
Expand Down
13 changes: 11 additions & 2 deletions Sources/PrivateInformationRetrieval/KeywordDatabase.swift
Original file line number Diff line number Diff line change
Expand Up @@ -365,15 +365,24 @@ public enum ProcessKeywordDatabase {
}
}

/// Events happening during shard processing.
public enum ProcessShardEvent {
/// A ``CuckooTable`` event.
case cuckooTableEvent(CuckooTable.Event)
}

/// Processes a database shard.
/// - Parameters:
/// - shard: Shard of a keyword database.
/// - arguments: Processing arguments.
/// - onEvent: Function to call when a ``ProcessShardEvent`` happens.
/// - Returns: The processed database.
/// - Throws: Error upon failure to process the shard.
@inlinable
public static func processShard<Scheme: HeScheme>(shard: KeywordDatabaseShard,
with arguments: Arguments<Scheme>) throws
with arguments: Arguments<Scheme>,
onEvent: @escaping (ProcessShardEvent) throws -> Void = { _ in
}) throws
-> ProcessedDatabaseWithParameters<Scheme>
{
let keywordConfig = arguments.databaseConfig.keywordPirConfig
Expand All @@ -383,7 +392,7 @@ public enum ProcessKeywordDatabase {
}
return try KeywordPirServer<MulPirServer<Scheme>>.process(database: shard,
config: keywordConfig,
with: context)
with: context, onEvent: onEvent)
}

/// Validates the correctness of processing on a shard.
Expand Down
12 changes: 10 additions & 2 deletions Sources/PrivateInformationRetrieval/KeywordPirProtocol.swift
Original file line number Diff line number Diff line change
Expand Up @@ -148,16 +148,24 @@ public final class KeywordPirServer<PirServer: IndexPirServer>: KeywordPirProtoc
/// - database: Collection of database entries.
/// - config: Keyword PIR configuration.
/// - context: Context for HE computation.
/// - onEvent: Function to call when a ``ProcessKeywordDatabase.ProcessShardEvent`` happens.
/// - Returns: A processed database.
/// - Throws: Error upon failure to process the database.
@inlinable
public static func process(database: some Collection<KeywordValuePair>,
config: KeywordPirConfig,
with context: Context<Scheme>)
with context: Context<Scheme>,
onEvent: @escaping (ProcessKeywordDatabase.ProcessShardEvent) throws -> Void = { _ in })
throws -> ProcessedDatabaseWithParameters<Scheme>
{
func onCuckooEvent(event: CuckooTable.Event) throws {
try onEvent(
ProcessKeywordDatabase.ProcessShardEvent
.cuckooTableEvent(event))
}

let cuckooTableConfig = config.cuckooTableConfig
let cuckooTable = try CuckooTable(config: cuckooTableConfig, database: database)
let cuckooTable = try CuckooTable(config: cuckooTableConfig, database: database, onEvent: onCuckooEvent)
let entryTable = try cuckooTable.serializeBuckets()
let maxEntrySize: Int
if config.useMaxSerializedBucketSize {
Expand Down

0 comments on commit 466b67c

Please sign in to comment.