-
Notifications
You must be signed in to change notification settings - Fork 0
/
inverse_bloom.py
71 lines (57 loc) · 1.99 KB
/
inverse_bloom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from __future__ import absolute_import
import math
import hashlib
from pybloom.utils import range_fn, is_string_io, running_python_3
from struct import unpack, pack, calcsize
try:
import bitarray
except ImportError:
raise ImportError('requires bitarray >= 0.3.4')
def make_hash_functions(num_slices, num_bits):
if num_bits >= (1 << 31):
fmt_code, chunk_size = 'Q', 8
elif num_bits >= (1 << 15):
fmt_code, chunk_size = 'I', 4
else:
fmt_code, chunk_size = 'H', 2
total_hash_bits = 8 * num_slices * chunk_size
if total_hash_bits > 384:
hashfn = hashlib.sha512
if total_hash_bits > 256:
hashfn = hashlib.sha384
if total_hash_bits > 160:
hashfn = hashlib.sha256
if total_hash_bits > 128:
hashfn = hashlib.sha1
else:
hashfn = hashlib.md5
fmt = fmt_code * (hashfn().digest_size // chunk_size)
num_salts, extra = divmod(num_slices, len(fmt))
if extra:
num_salts += 1
salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts))
def _make_hashfuncs(key):
if running_python_3:
if isinstance(key, str):
key = key.encode('uft-8')
else:
key = str(key)
i = 0
for salt in salts:
h = salt.copy()
h.update(key)
for uint in unpack(fmt, h.digest()):
yield unit % num_bits
i += 1
if i >= num_slices:
return
return _make_hashfuncs
# Inverse Bloom Filter is a concurrent "inverse" Bloom filter
# as outlined by Jeff Hodges (link) building upon the pybloom
# python Bloom Filter library by Jay Baird github.com/jaybaird/python-bloomfilter
# This filter may report a false negative but can never report a false positive.
class InverseBloomFilter(object):
def __init__(self):
self.array = bytearray()
self.hash_pool = []
self.capacity = 0