Skip to content

Commit

Permalink
util/bloom: add a bloom filter implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate committed Oct 29, 2019
1 parent ec66920 commit 564ed84
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 0 deletions.
68 changes: 68 additions & 0 deletions util/bloom/bloomfilter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package bloom

import (
"fmt"
"hash/fnv"
)

// Filter a simple abstraction of bloom filter
type Filter struct {
bitSet []uint64
length uint64
unitSize uint64
}

// NewFilter returns a filter with a given size
func NewFilter(length int) (*Filter, error) {
if length <= 0 {
return nil, fmt.Errorf("length is not positive")
}
bitset := make([]uint64, length)
bits := uint64(64)
return &Filter{
bitSet: bitset,
length: bits * uint64(length),
unitSize: bits,
}, nil
}

// NewFilterBySlice create a bloom filter by the given slice
func NewFilterBySlice(bs []uint64) (*Filter, error) {
if len(bs) == 0 {
return nil, fmt.Errorf("len(bs) == 0")
}

bits := uint64(64)
return &Filter{
bitSet: bs,
length: bits * uint64(len(bs)),
unitSize: bits,
}, nil
}

// Insert a key into the filter
func (bf *Filter) Insert(key []byte) {
idx, shift := bf.hash(key)
bf.bitSet[idx] |= 1 << shift
}

// Probe check whether the given key is in the filter
func (bf *Filter) Probe(key []byte) bool {
idx, shift := bf.hash(key)

return bf.bitSet[idx]&(1<<shift) != 0
}

func (bf *Filter) hash(key []byte) (uint64, uint64) {
hash := ihash(key) % uint64(bf.length)
idx := hash / bf.unitSize
shift := hash % bf.unitSize

return idx, shift
}

func ihash(key []byte) uint64 {
h := fnv.New64a()
h.Write(key)
return h.Sum64()
}
129 changes: 129 additions & 0 deletions util/bloom/bloomfilter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package bloom

import (
"testing"

"github.com/pingcap/check"
)

func TestT(t *testing.T) {
check.TestingT(t)
}

var _ = check.Suite(&testBloomFilterSuite{})

type testBloomFilterSuite struct{}

func (s *testBloomFilterSuite) TestNewBloomFilter(c *check.C) {
_, err1 := NewFilter(0)
c.Assert(err1, check.NotNil)

_, err2 := NewFilter(10)
c.Assert(err2, check.IsNil)
}

func (s *testBloomFilterSuite) TestNewBloomFilterBySlice(c *check.C) {
_, err1 := NewFilterBySlice(make([]uint64, 0))
c.Assert(err1, check.NotNil)

_, err2 := NewFilterBySlice(make([]uint64, 10))
c.Assert(err2, check.IsNil)
}

func (s *testBloomFilterSuite) TestBasic(c *check.C) {
bf, _ := NewFilterBySlice(make([]uint64, 10))
bf.Insert([]byte("Heading"))
bf.Insert([]byte("towards"))
bf.Insert([]byte("the"))
bf.Insert([]byte("ocean"))
bf.Insert([]byte("blue"))

bf.Insert([]byte("Reaching"))
bf.Insert([]byte("for"))
bf.Insert([]byte("the"))
bf.Insert([]byte("stars"))

bf.Insert([]byte("it's"))
bf.Insert([]byte("every"))
bf.Insert([]byte("effort"))
bf.Insert([]byte("of"))
bf.Insert([]byte("yours"))

bf.Insert([]byte("Making"))
bf.Insert([]byte("our"))
bf.Insert([]byte("dream"))
bf.Insert([]byte("come"))
bf.Insert([]byte("check.IsTrue"))

bf.Insert([]byte("Let's"))
bf.Insert([]byte("shape"))
bf.Insert([]byte("the"))
bf.Insert([]byte("future"))
bf.Insert([]byte("of"))
bf.Insert([]byte("database"))
bf.Insert([]byte("together"))

c.Assert(bf.Probe([]byte("Heading")), check.IsTrue)
c.Assert(bf.Probe([]byte("towards")), check.IsTrue)
c.Assert(bf.Probe([]byte("the")), check.IsTrue)
c.Assert(bf.Probe([]byte("ocean")), check.IsTrue)
c.Assert(bf.Probe([]byte("blue")), check.IsTrue)

c.Assert(bf.Probe([]byte("Reaching")), check.IsTrue)
c.Assert(bf.Probe([]byte("for")), check.IsTrue)
c.Assert(bf.Probe([]byte("the")), check.IsTrue)
c.Assert(bf.Probe([]byte("stars")), check.IsTrue)

c.Assert(bf.Probe([]byte("it's")), check.IsTrue)
c.Assert(bf.Probe([]byte("every")), check.IsTrue)
c.Assert(bf.Probe([]byte("effort")), check.IsTrue)
c.Assert(bf.Probe([]byte("of")), check.IsTrue)
c.Assert(bf.Probe([]byte("yours")), check.IsTrue)

c.Assert(bf.Probe([]byte("check.IsTrue")), check.IsTrue)
c.Assert(bf.Probe([]byte("come")), check.IsTrue)
c.Assert(bf.Probe([]byte("dream")), check.IsTrue)
c.Assert(bf.Probe([]byte("our")), check.IsTrue)
c.Assert(bf.Probe([]byte("Making")), check.IsTrue)

c.Assert(bf.Probe([]byte("together")), check.IsTrue)
c.Assert(bf.Probe([]byte("database")), check.IsTrue)
c.Assert(bf.Probe([]byte("of")), check.IsTrue)
c.Assert(bf.Probe([]byte("future")), check.IsTrue)
c.Assert(bf.Probe([]byte("the")), check.IsTrue)
c.Assert(bf.Probe([]byte("shape")), check.IsTrue)
c.Assert(bf.Probe([]byte("Let's")), check.IsTrue)

c.Assert(bf.Probe([]byte("shit")), check.IsFalse)
c.Assert(bf.Probe([]byte("fuck")), check.IsFalse)
c.Assert(bf.Probe([]byte("foo")), check.IsFalse)
c.Assert(bf.Probe([]byte("bar")), check.IsFalse)
}

func BenchmarkBloomInsert(b *testing.B) {
for i := 0; i < b.N; i++ {
bf, _ := NewFilterBySlice(make([]uint64, 10))
bf.Insert([]byte("Heading"))
bf.Insert([]byte("towards"))
bf.Insert([]byte("the"))
bf.Insert([]byte("ocean"))
bf.Insert([]byte("blue"))
}
}

func BenchmarkBloom(b *testing.B) {
bf, _ := NewFilterBySlice(make([]uint64, 10))
bf.Insert([]byte("Heading"))
bf.Insert([]byte("towards"))
bf.Insert([]byte("the"))
bf.Insert([]byte("ocean"))
bf.Insert([]byte("blue"))

for i := 0; i < b.N; i++ {
bf.Probe([]byte("Heading"))
bf.Probe([]byte("towards"))
bf.Probe([]byte("the"))
bf.Probe([]byte("ocean"))
bf.Probe([]byte("blue"))
}
}

0 comments on commit 564ed84

Please sign in to comment.