-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
242 additions
and
56 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
use super::Filter; | ||
use crate::NippyJarError; | ||
use cuckoofilter::{self, CuckooFilter, ExportedCuckooFilter}; | ||
use serde::{Deserialize, Deserializer, Serialize, Serializer}; | ||
use std::collections::hash_map::DefaultHasher; | ||
|
||
/// [CuckooFilter](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf). It builds and provides an approximated set-membership filter to answer queries such as "Does this element belong to this set?". Has a theoretical 3% false positive rate. | ||
pub struct Cuckoo { | ||
/// Remaining number of elements that can be added. | ||
/// | ||
/// This is necessary because the inner implementation will fail on adding an element past capacity, **but it will still add it and remove other**: [source](https://github.com/axiomhq/rust-cuckoofilter/tree/624da891bed1dd5d002c8fa92ce0dcd301975561#notes--todos) | ||
remaining: usize, | ||
|
||
/// CuckooFilter. | ||
filter: CuckooFilter<DefaultHasher>, // TODO does it need an actual hasher? | ||
} | ||
|
||
impl Cuckoo { | ||
pub fn new(max_capacity: usize) -> Self { | ||
Cuckoo { remaining: max_capacity, filter: CuckooFilter::with_capacity(max_capacity) } | ||
} | ||
} | ||
|
||
impl Filter for Cuckoo { | ||
fn add(&mut self, element: &[u8]) -> Result<(), NippyJarError> { | ||
if self.remaining == 0 { | ||
return Err(NippyJarError::FilterMaxCapacity) | ||
} | ||
|
||
self.remaining -= 1; | ||
|
||
Ok(self.filter.add(element)?) | ||
} | ||
|
||
fn contains(&self, element: &[u8]) -> Result<bool, NippyJarError> { | ||
Ok(self.filter.contains(element)) | ||
} | ||
} | ||
|
||
impl std::fmt::Debug for Cuckoo { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
f.debug_struct("Cuckoo") | ||
.field("remaining", &self.remaining) | ||
.field("filter_size", &self.filter.memory_usage()) | ||
.finish_non_exhaustive() | ||
} | ||
} | ||
|
||
impl PartialEq for Cuckoo { | ||
fn eq(&self, _other: &Self) -> bool { | ||
self.remaining == _other.remaining && | ||
{ | ||
#[cfg(not(test))] | ||
{ | ||
unimplemented!("No way to figure it out without exporting (expensive), so only allow direct comparison on a test") | ||
} | ||
#[cfg(test)] | ||
{ | ||
let f1 = self.filter.export(); | ||
let f2 = _other.filter.export(); | ||
return f1.length == f2.length && f1.values == f2.values | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl<'de> Deserialize<'de> for Cuckoo { | ||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> | ||
where | ||
D: Deserializer<'de>, | ||
{ | ||
let (remaining, exported): (usize, ExportedCuckooFilter) = | ||
Deserialize::deserialize(deserializer)?; | ||
|
||
Ok(Cuckoo { remaining, filter: exported.into() }) | ||
} | ||
} | ||
|
||
impl Serialize for Cuckoo { | ||
/// Potentially expensive, but should be used only when creating the file. | ||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||
where | ||
S: Serializer, | ||
{ | ||
(self.remaining, self.filter.export()).serialize(serializer) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
use crate::NippyJarError; | ||
use serde::{Deserialize, Serialize}; | ||
|
||
mod cuckoo; | ||
pub use cuckoo::Cuckoo; | ||
|
||
pub trait Filter { | ||
/// Add element to the inclusion list. | ||
fn add(&mut self, element: &[u8]) -> Result<(), NippyJarError>; | ||
|
||
/// Checks if the element belongs to the inclusion list. There might be false positives. | ||
fn contains(&self, element: &[u8]) -> Result<bool, NippyJarError>; | ||
} | ||
|
||
#[derive(Debug, Serialize, Deserialize, PartialEq)] | ||
pub enum Filters { | ||
Cuckoo(Cuckoo), | ||
// Avoids irrefutable let errors. Remove this after adding another one. | ||
Unused, | ||
} | ||
|
||
impl Filter for Filters { | ||
fn add(&mut self, element: &[u8]) -> Result<(), NippyJarError> { | ||
match self { | ||
Filters::Cuckoo(c) => c.add(element), | ||
Filters::Unused => todo!(), | ||
} | ||
} | ||
|
||
fn contains(&self, element: &[u8]) -> Result<bool, NippyJarError> { | ||
match self { | ||
Filters::Cuckoo(c) => c.contains(element), | ||
Filters::Unused => todo!(), | ||
} | ||
} | ||
} |
Oops, something went wrong.