From f8cedef1e7037f467509653ad6bf2d941834f27c Mon Sep 17 00:00:00 2001
From: Elad Gabay <elad.gabay@wiz.io>
Date: Sun, 13 Aug 2023 20:14:37 +0300
Subject: [PATCH] Enhance Support for Larger Datasets and Buckets in Encoding

This commit improves encoding by enabling the handling of number of items and buckets exceeding max(uint32). Formerly, the encoding used uint32 for counts, but the filter structure already supported larger values using uint.
Until now, the filter partially supported larger datasets, not all the buckets were utilized, note to the change in `generateIndexTagHash`, `altIndex` and `indexHash`.

Now, all references to bucket indices and item counts explicitly use uint64. A new encoding format accommodates larger filter.
To distinguish between legacy (up to max(uint32) items) and the new format, a prefix marker is introduced.

Decoding seamlessly supports both formats.
The encode method takes a legacy boolean parameter for gradual adoption.
---
 cuckoofilter.go      | 182 +++++++++++++++++++++++++++++++------------
 cuckoofilter_test.go | 135 ++++++++++++++++----------------
 packedtable.go       |  98 ++++++++++++++---------
 singletable.go       | 106 +++++++++++++++----------
 util.go              |  14 ++--
 5 files changed, 337 insertions(+), 198 deletions(-)

diff --git a/cuckoofilter.go b/cuckoofilter.go
index b03092f..65fc1b8 100644
--- a/cuckoofilter.go
+++ b/cuckoofilter.go
@@ -11,36 +11,39 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"math"
 
 	"github.com/dgryski/go-metro"
 )
 
 // maximum number of cuckoo kicks before claiming failure
-const kMaxCuckooCount uint = 500
+const kMaxCuckooCount uint64 = 500
+
+type TableType uint32
 
 const (
 	// TableTypeSingle normal single table
-	TableTypeSingle = 0
+	TableTypeSingle TableType = 0
 	// TableTypePacked packed table, use semi-sort to save 1 bit per item
-	TableTypePacked = 1
+	TableTypePacked TableType = 1
 )
 
 type table interface {
-	Init(tagsPerBucket, bitsPerTag, num uint, initialBucketsHint []byte) error
-	NumBuckets() uint
-	FindTagInBuckets(i1, i2 uint, tag uint32) bool
-	DeleteTagFromBucket(i uint, tag uint32) bool
-	InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool
-	SizeInTags() uint
-	SizeInBytes() uint
+	Init(tagsPerBucket, bitsPerTag uint, num uint64, initialBucketsHint []byte) error
+	NumBuckets() uint64
+	FindTagInBuckets(i1, i2 uint64, tag uint32) bool
+	DeleteTagFromBucket(i uint64, tag uint32) bool
+	InsertTagToBucket(i uint64, tag uint32, kickOut bool, oldTag *uint32) bool
+	SizeInTags() uint64
+	SizeInBytes() uint64
 	Info() string
 	BitsPerItem() uint
-	Reader() (io.Reader, uint)
-	Decode([]byte) error
+	Reader(legacy bool) (io.Reader, uint64)
+	Decode(legacy bool, _ []byte) error
 	Reset()
 }
 
-func getTable(tableType uint) interface{} {
+func getTable(tableType TableType) interface{} {
 	switch tableType {
 	case TableTypePacked:
 		return NewPackedTable()
@@ -50,30 +53,28 @@ func getTable(tableType uint) interface{} {
 }
 
 type victimCache struct {
-	index uint
+	index uint64
 	tag   uint32
 	used  bool
 }
 
-const filterMetadataSize = 3*bytesPerUint32 + 1
-
 // Filter cuckoo filter type struct
 type Filter struct {
 	victim   victimCache
-	numItems uint
+	numItems uint64
 	table    table
 }
 
-//NewFilter return a new initialized filter
+// NewFilter return a new initialized filter
 /*
 	tagsPerBucket: num of tags for each bucket, which is b in paper. tag is fingerprint, which is f in paper.
-	bitPerItem: num of bits for each item, which is length of tag(fingerprint)
+	bitPerItem: num of bits for each item, which is length of tag(fingerprint) (max is 32)
 	maxNumKeys: num of keys that filter will store. this value should close to and lower
 				nextPow2(maxNumKeys/tagsPerBucket) * maxLoadFactor. cause table.NumBuckets is always a power of two
 */
-func NewFilter(tagsPerBucket, bitsPerItem, maxNumKeys, tableType uint) *Filter {
-	numBuckets := getNextPow2(uint64(maxNumKeys / tagsPerBucket))
-	if float64(maxNumKeys)/float64(numBuckets*tagsPerBucket) > maxLoadFactor(tagsPerBucket) {
+func NewFilter(tagsPerBucket, bitsPerItem uint, maxNumKeys uint64, tableType TableType) *Filter {
+	numBuckets := getNextPow2(maxNumKeys / uint64(tagsPerBucket))
+	if float64(maxNumKeys)/float64(numBuckets*uint64(tagsPerBucket)) > maxLoadFactor(tagsPerBucket) {
 		numBuckets <<= 1
 	}
 	if numBuckets == 0 {
@@ -86,30 +87,46 @@ func NewFilter(tagsPerBucket, bitsPerItem, maxNumKeys, tableType uint) *Filter {
 	}
 }
 
-func (f *Filter) indexHash(hv uint32) uint {
+func (f *Filter) indexHash(hv uint64) uint64 {
 	// table.NumBuckets is always a power of two, so modulo can be replaced with bitwise-and:
-	return uint(hv) & (f.table.NumBuckets() - 1)
+	return hv & (f.table.NumBuckets() - 1)
 }
 
-func (f *Filter) tagHash(hv uint32) uint32 {
-	return hv%((1<<f.table.BitsPerItem())-1) + 1
+func (f *Filter) tagHash(hv uint64) uint32 {
+	return uint32(hv)%((1<<f.table.BitsPerItem())-1) + 1
 }
 
-func (f *Filter) generateIndexTagHash(item []byte) (index uint, tag uint32) {
-	hash := metro.Hash64(item, 1337)
-	index = f.indexHash(uint32(hash >> 32))
-	tag = f.tagHash(uint32(hash))
+func (f *Filter) generateIndexTagHash(item []byte) (index uint64, tag uint32) {
+	// For backward compatibility with existing filters before adding the support of more than max uint32 items,
+	// we need to use the old hash function.
+	if f.table.SizeInTags()-1 <= math.MaxUint32 {
+		hash := metro.Hash64(item, 1337)
+		index = f.indexHash(hash >> 32)
+		tag = f.tagHash(hash)
+		return
+	}
+
+	hash1, hash2 := metro.Hash128(item, 1337)
+	index = f.indexHash(hash1)
+	tag = f.tagHash(hash2)
 	return
 }
 
-func (f *Filter) altIndex(index uint, tag uint32) uint {
-	// 0x5bd1e995 is the hash constant from MurmurHash2
-	return f.indexHash(uint32(index) ^ (tag * 0x5bd1e995))
+func (f *Filter) altIndex(index uint64, tag uint32) uint64 {
+	// For backward compatibility with existing filters before adding the support of more than max uint32 items,
+	// we need to use the old hash function.
+	if f.table.SizeInTags()-1 <= math.MaxUint32 {
+		// 0x5bd1e995 is the hash constant from MurmurHash2.
+		return f.indexHash(uint64(uint32(index) ^ (tag * 0x5bd1e995)))
+	}
+
+	// 0xc6a4a7935bd1e995 is the hash constant from MurmurHash64A.
+	return f.indexHash(index ^ (uint64(tag) * 0xc6a4a7935bd1e995))
 }
 
 // Size return num of items that filter store
-func (f *Filter) Size() uint {
-	var c uint
+func (f *Filter) Size() uint64 {
+	var c uint64
 	if f.victim.used {
 		c = 1
 	}
@@ -122,7 +139,7 @@ func (f *Filter) LoadFactor() float64 {
 }
 
 // SizeInBytes return bytes occupancy of filter's table
-func (f *Filter) SizeInBytes() uint {
+func (f *Filter) SizeInBytes() uint64 {
 	return f.table.SizeInBytes()
 }
 
@@ -148,12 +165,12 @@ func (f *Filter) AddUnique(item []byte) bool {
 	return f.Add(item)
 }
 
-func (f *Filter) addImpl(i uint, tag uint32) bool {
+func (f *Filter) addImpl(i uint64, tag uint32) bool {
 	curIndex := i
 	curTag := tag
 	var oldTag uint32
 
-	var count uint
+	var count uint64
 	var kickOut bool
 	for count = 0; count < kMaxCuckooCount; count++ {
 		kickOut = count > 0
@@ -255,8 +272,8 @@ func (f *Filter) Info() string {
 }
 
 // Encode returns a byte slice representing a Cuckoo filter
-func (f *Filter) Encode() ([]byte, error) {
-	filterReader, filterSize := f.EncodeReader()
+func (f *Filter) Encode(legacy bool) ([]byte, error) {
+	filterReader, filterSize := f.EncodeReader(legacy)
 	buf := make([]byte, filterSize)
 	if _, err := io.ReadFull(filterReader, buf); err != nil {
 		return nil, err
@@ -264,10 +281,48 @@ func (f *Filter) Encode() ([]byte, error) {
 	return buf, nil
 }
 
+const (
+	// uint32(numItems), uint32(victim.index), uint32(victim.tag), byte(victim.used)
+	filterMetadataSizeLegacy = 3*bytesPerUint32 + 1
+
+	//  uint64(numItems), uint64(victim.index), uint32(victim.tag), byte(victim.used)
+	filterMetadataSize = 2*bytesPerUint64 + bytesPerUint32 + 1
+)
+
+// In the legacy serialization format, there are 3 uint32s and then a byte for "victimUsed" which is a boolean 0 or 1.
+// We need a way to distinguish between the legacy format and the new format, so we can use that byte as a marker
+// (0/1 means the legacy format, other value means the new format).
+// In the new format the first 13 bytes are not used, just markers, the actual serialization starts after the marker.
+// The marker is hex of "IMNOTLEGACY!!" :).
+var newFormatMarker = [filterMetadataSizeLegacy]byte{0x49, 0x4D, 0x4E, 0x4F, 0x54, 0x4C, 0x45, 0x47, 0x41, 0x43, 0x59, 0x21, 0x21}
+
 // EncodeReader returns a reader representing a Cuckoo filter
-func (f *Filter) EncodeReader() (io.Reader, uint) {
+func (f *Filter) EncodeReader(legacy bool) (io.Reader, uint64) {
+	if legacy {
+		return f.encodeReaderLegacyMaxUint32()
+	}
+
 	var metadata [filterMetadataSize]byte
 
+	for i, n := range []uint64{f.numItems, f.victim.index} {
+		binary.LittleEndian.PutUint64(metadata[i*bytesPerUint64:], n)
+	}
+
+	binary.LittleEndian.PutUint32(metadata[2*bytesPerUint64:], f.victim.tag)
+
+	victimUsed := byte(0)
+	if f.victim.used {
+		victimUsed = byte(1)
+	}
+	metadata[2*bytesPerUint64+bytesPerUint32] = victimUsed
+	tableReader, tableEncodedSize := f.table.Reader(false)
+	return io.MultiReader(bytes.NewReader(newFormatMarker[:]), bytes.NewReader(metadata[:]), tableReader), uint64(len(newFormatMarker)) + uint64(len(metadata)) + tableEncodedSize
+}
+
+// encodeReaderLegacyMaxUint32 returns a reader representing a Cuckoo filter encoded in the legacy mode that supports up to max(uint32) items.
+func (f *Filter) encodeReaderLegacyMaxUint32() (io.Reader, uint64) {
+	var metadata [filterMetadataSizeLegacy]byte
+
 	for i, n := range []uint32{uint32(f.numItems), uint32(f.victim.index), f.victim.tag} {
 		binary.LittleEndian.PutUint32(metadata[i*bytesPerUint32:], n)
 	}
@@ -277,8 +332,8 @@ func (f *Filter) EncodeReader() (io.Reader, uint) {
 		victimUsed = byte(1)
 	}
 	metadata[bytesPerUint32*3] = victimUsed
-	tableReader, tableEncodedSize := f.table.Reader()
-	return io.MultiReader(bytes.NewReader(metadata[:]), tableReader), uint(len(metadata)) + tableEncodedSize
+	tableReader, tableEncodedSize := f.table.Reader(true)
+	return io.MultiReader(bytes.NewReader(metadata[:]), tableReader), uint64(len(metadata)) + tableEncodedSize
 }
 
 // Decode returns a Cuckoo Filter using a copy of the provided byte slice.
@@ -293,13 +348,42 @@ func DecodeFrom(b []byte) (*Filter, error) {
 	if len(b) < 20 {
 		return nil, errors.New("unexpected bytes length")
 	}
-	numItems := uint(binary.LittleEndian.Uint32(b[0*bytesPerUint32:]))
-	curIndex := uint(binary.LittleEndian.Uint32(b[1*bytesPerUint32:]))
-	curTag := binary.LittleEndian.Uint32(b[2*1*bytesPerUint32:])
-	used := b[12] == byte(1)
-	tableType := uint(b[13])
+
+	curOffset := uint64(0)
+	legacy := uint(b[len(newFormatMarker)-1]) <= 1
+
+	// Skip the marker if it's the new format.
+	if !legacy {
+		curOffset += uint64(len(newFormatMarker))
+	}
+
+	var numItems uint64
+	if legacy {
+		numItems = uint64(binary.LittleEndian.Uint32(b[curOffset:]))
+		curOffset += bytesPerUint32
+	} else {
+		numItems = binary.LittleEndian.Uint64(b[curOffset:])
+		curOffset += bytesPerUint64
+	}
+
+	var curIndex uint64
+	if legacy {
+		curIndex = uint64(binary.LittleEndian.Uint32(b[curOffset:]))
+		curOffset += bytesPerUint32
+	} else {
+		curIndex = binary.LittleEndian.Uint64(b[curOffset:])
+		curOffset += bytesPerUint64
+	}
+
+	curTag := binary.LittleEndian.Uint32(b[curOffset:])
+	curOffset += bytesPerUint32
+
+	used := b[curOffset] == byte(1)
+
+	tableOffset := curOffset + 1
+	tableType := TableType(b[tableOffset])
 	table := getTable(tableType).(table)
-	if err := table.Decode(b[13:]); err != nil {
+	if err := table.Decode(legacy, b[tableOffset:]); err != nil {
 		return nil, err
 	}
 	return &Filter{
diff --git a/cuckoofilter_test.go b/cuckoofilter_test.go
index b94a317..d2b2c33 100644
--- a/cuckoofilter_test.go
+++ b/cuckoofilter_test.go
@@ -19,7 +19,8 @@ const size = 100000
 var (
 	testBucketSize      = []uint{2, 4, 8}
 	testFingerprintSize = []uint{2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 16, 17, 23, 31, 32}
-	testTableType       = []uint{TableTypeSingle, TableTypePacked}
+	encodeLegacy        = []bool{true, false}
+	testTableType       = []TableType{TableTypeSingle, TableTypePacked}
 )
 
 func TestFilter(t *testing.T) {
@@ -29,82 +30,84 @@ func TestFilter(t *testing.T) {
 	for _, b := range testBucketSize {
 		for _, f := range testFingerprintSize {
 			for _, table := range testTableType {
-				if f == 2 && table == TableTypePacked {
-					continue
-				}
-				if table == TableTypePacked && b != 4 {
-					continue
-				}
-				cf := NewFilter(b, f, 8190, table)
-				// fmt.Println(cf.Info())
-				a := make([][]byte, 0)
-				for i := uint(0); i < insertNum; i++ {
-					_, _ = io.ReadFull(rand.Reader, hash[:])
-					if cf.AddUnique(hash[:]) {
-						tmp := make([]byte, 32)
-						copy(tmp, hash[:])
-						a = append(a, tmp)
+				for _, legacy := range encodeLegacy {
+					if f == 2 && table == TableTypePacked {
+						continue
+					}
+					if table == TableTypePacked && b != 4 {
+						continue
+					}
+					cf := NewFilter(b, f, 8190, table)
+					// fmt.Println(cf.Info())
+					a := make([][]byte, 0)
+					for i := uint(0); i < insertNum; i++ {
+						_, _ = io.ReadFull(rand.Reader, hash[:])
+						if cf.AddUnique(hash[:]) {
+							tmp := make([]byte, 32)
+							copy(tmp, hash[:])
+							a = append(a, tmp)
+						}
 					}
-				}
 
-				count := cf.Size()
-				if count != uint(len(a)) {
-					t.Errorf("Expected count = %d, instead count = %d, b %v f %v", uint(len(a)), count, b, f)
-					return
-				}
+					count := cf.Size()
+					if count != uint64(len(a)) {
+						t.Errorf("Expected count = %d, instead count = %d, b %v f %v", uint(len(a)), count, b, f)
+						return
+					}
 
-				encodedBytes, err := cf.Encode()
-				if err != nil {
-					t.Fatalf("err %v", err)
-				}
-				if len(encodedBytes) != cap(encodedBytes) {
-					t.Fatalf("len(%d) != cap(%d)", len(encodedBytes), cap(encodedBytes))
-				}
-				ncf, err := Decode(encodedBytes)
-				if err != nil || !reflect.DeepEqual(cf, ncf) {
-					t.Errorf("Expected epual, err %v", err)
-					return
-				}
+					encodedBytes, err := cf.Encode(legacy)
+					if err != nil {
+						t.Fatalf("err %v", err)
+					}
+					if len(encodedBytes) != cap(encodedBytes) {
+						t.Fatalf("len(%d) != cap(%d)", len(encodedBytes), cap(encodedBytes))
+					}
+					ncf, err := Decode(encodedBytes)
+					if err != nil || !reflect.DeepEqual(cf, ncf) {
+						t.Errorf("Expected epual, err %v", err)
+						return
+					}
 
-				encodedBytes, err = cf.Encode()
-				if err != nil {
-					t.Fatalf("err %v", err)
-				}
-				ncf, err = DecodeFrom(encodedBytes)
-				if err != nil || !reflect.DeepEqual(cf, ncf) {
-					t.Errorf("Expected epual, err %v", err)
-					return
-				}
+					encodedBytes, err = cf.Encode(legacy)
+					if err != nil {
+						t.Fatalf("err %v", err)
+					}
+					ncf, err = DecodeFrom(encodedBytes)
+					if err != nil || !reflect.DeepEqual(cf, ncf) {
+						t.Errorf("Expected epual, err %v", err)
+						return
+					}
 
-				filterReader, _ := cf.EncodeReader()
-				bytesFromReader, err := io.ReadAll(filterReader)
-				if err != nil {
-					t.Fatalf("Error reading from reader")
-				}
-				if !bytes.Equal(bytesFromReader, encodedBytes) {
-					t.Fatalf("Expected to be equal")
-				}
+					filterReader, _ := cf.EncodeReader(legacy)
+					bytesFromReader, err := io.ReadAll(filterReader)
+					if err != nil {
+						t.Fatalf("Error reading from reader")
+					}
+					if !bytes.Equal(bytesFromReader, encodedBytes) {
+						t.Fatalf("Expected to be equal")
+					}
 
-				fmt.Println(cf.Info())
-				cf.BitsPerItem()
-				cf.SizeInBytes()
-				cf.LoadFactor()
+					fmt.Println(cf.Info())
+					cf.BitsPerItem()
+					cf.SizeInBytes()
+					cf.LoadFactor()
+
+					for _, v := range a {
+						if !cf.Contain(v) {
+							t.Errorf("Expected contain, instead not contain, b %v f %v table type %v", b, f, table)
+							return
+						}
+						cf.Delete(v)
+					}
 
-				for _, v := range a {
-					if !cf.Contain(v) {
-						t.Errorf("Expected contain, instead not contain, b %v f %v table type %v", b, f, table)
+					count = cf.Size()
+					if count != 0 {
+						t.Errorf("Expected count = 0, instead count == %d, b %v f %v table type %v", count, b, f, table)
 						return
 					}
-					cf.Delete(v)
-				}
 
-				count = cf.Size()
-				if count != 0 {
-					t.Errorf("Expected count = 0, instead count == %d, b %v f %v table type %v", count, b, f, table)
-					return
+					fmt.Printf("Filter bucketSize %v fingerprintSize %v tableType %v falsePositive Rate %v \n", b, f, table, cf.FalsePositiveRate())
 				}
-
-				fmt.Printf("Filter bucketSize %v fingerprintSize %v tableType %v falsePositive Rate %v \n", b, f, table, cf.FalsePositiveRate())
 			}
 		}
 	}
diff --git a/packedtable.go b/packedtable.go
index dd2ea34..076d03b 100644
--- a/packedtable.go
+++ b/packedtable.go
@@ -21,10 +21,11 @@ type PackedTable struct {
 	kDirBitsMask    uint32
 	bitsPerTag      uint
 
-	len        uint
-	numBuckets uint
+	len        uint64
+	numBuckets uint64
 	buckets    []byte
 	perm       PermEncoding
+	totalSlots uint64
 }
 
 // NewPackedTable return a packedTable
@@ -39,7 +40,7 @@ const (
 )
 
 // Init init table
-func (p *PackedTable) Init(_, bitsPerTag, num uint, initialBucketsHint []byte) error {
+func (p *PackedTable) Init(_, bitsPerTag uint, num uint64, initialBucketsHint []byte) error {
 	p.bitsPerTag = bitsPerTag
 	p.numBuckets = num
 
@@ -48,28 +49,29 @@ func (p *PackedTable) Init(_, bitsPerTag, num uint, initialBucketsHint []byte) e
 	p.kBytesPerBucket = (p.kBitsPerBucket + 7) >> 3
 	p.kDirBitsMask = ((1 << p.kDirBitsPerTag) - 1) << cFpSize
 	// NOTE: use 7 extra bytes to avoid overrun as we always read a uint64
-	p.len = (p.kBitsPerBucket*p.numBuckets+7)>>3 + 7
+	p.len = (uint64(p.kBitsPerBucket)*p.numBuckets+7)>>3 + 7
 	buckets, err := getBucketsFromHint(initialBucketsHint, p.len)
 	if err != nil {
 		return err
 	}
 	p.buckets = buckets
 	p.perm.Init()
+	p.totalSlots = tagsPerPTable * p.numBuckets
 	return nil
 }
 
 // NumBuckets return num of table buckets
-func (p *PackedTable) NumBuckets() uint {
+func (p *PackedTable) NumBuckets() uint64 {
 	return p.numBuckets
 }
 
 // SizeInTags return num of tags that table can store
-func (p *PackedTable) SizeInTags() uint {
-	return tagsPerPTable * p.numBuckets
+func (p *PackedTable) SizeInTags() uint64 {
+	return p.totalSlots
 }
 
 // SizeInBytes return bytes occupancy of table
-func (p *PackedTable) SizeInBytes() uint {
+func (p *PackedTable) SizeInBytes() uint64 {
 	return p.len
 }
 
@@ -79,9 +81,9 @@ func (p *PackedTable) BitsPerItem() uint {
 }
 
 // PrintBucket print a bucket
-func (p *PackedTable) PrintBucket(i uint) {
-	pos := p.kBitsPerBucket * i / bitsPerByte
-	fmt.Printf("\tbucketBits  =%x\n", p.buckets[pos:pos+p.kBytesPerBucket])
+func (p *PackedTable) PrintBucket(i uint64) {
+	pos := uint64(p.kBitsPerBucket) * i / bitsPerByte
+	fmt.Printf("\tbucketBits  =%x\n", p.buckets[pos:pos+uint64(p.kBytesPerBucket)])
 	var tags [tagsPerPTable]uint32
 	p.ReadBucket(i, &tags)
 	p.PrintTags(tags)
@@ -118,10 +120,10 @@ func (p *PackedTable) sortTags(tags *[tagsPerPTable]uint32) {
 
 // ReadBucket read and decode the bucket i, pass the 4 decoded tags to the 2nd arg
 // bucket bits = 12 codeword bits + dir bits of tag1 + dir bits of tag2 ...
-func (p *PackedTable) ReadBucket(i uint, tags *[tagsPerPTable]uint32) {
+func (p *PackedTable) ReadBucket(i uint64, tags *[tagsPerPTable]uint32) {
 	var codeword uint16
 	var lowBits [tagsPerPTable]uint8
-	pos := i * p.kBitsPerBucket >> 3
+	pos := i * uint64(p.kBitsPerBucket) >> 3
 	switch p.bitsPerTag {
 	case 5:
 		// 1 dirBits per tag, 16 bits per bucket
@@ -205,13 +207,13 @@ func (p *PackedTable) ReadBucket(i uint, tags *[tagsPerPTable]uint32) {
 	tags[3] |= uint32(lowBits[3])
 }
 
-func (p *PackedTable) readOutBytes(i, pos uint) (uint64, uint64, uint) {
-	rShift := (p.kBitsPerBucket * i) & (bitsPerByte - 1)
+func (p *PackedTable) readOutBytes(i, pos uint64) (uint64, uint64, uint64) {
+	rShift := (uint64(p.kBitsPerBucket) * i) & (bitsPerByte - 1)
 	// tag is max 32bit, store 31bit per tag, so max occupies 16 bytes
-	kBytes := (rShift + p.kBitsPerBucket + 7) / bitsPerByte
+	kBytes := (rShift + uint64(p.kBitsPerBucket) + 7) / bitsPerByte
 
 	var u1, u2 uint64
-	for k := uint(0); k < kBytes; k++ {
+	for k := uint64(0); k < kBytes; k++ {
 		if k < bytesPerUint64 {
 			u1 |= uint64(p.buckets[pos+k]) << (k * bitsPerByte)
 		} else {
@@ -223,7 +225,7 @@ func (p *PackedTable) readOutBytes(i, pos uint) (uint64, uint64, uint) {
 }
 
 // WriteBucket write tags into bucket i
-func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) {
+func (p *PackedTable) WriteBucket(i uint64, tags [tagsPerPTable]uint32) {
 	p.sortTags(&tags)
 
 	/* put in direct bits for each tag*/
@@ -242,7 +244,7 @@ func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) {
 	// note that :  tags[j] = lowBits[j] | highBits[j]
 
 	codeword := p.perm.Encode(lowBits)
-	pos := i * p.kBitsPerBucket >> 3
+	pos := i * uint64(p.kBitsPerBucket) >> 3
 	switch p.kBitsPerBucket {
 	case 16:
 		// 1 dirBits per tag
@@ -342,11 +344,11 @@ func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) {
 	}
 }
 
-func (p *PackedTable) writeInBytes(i, pos uint, codeword uint16, highBits [tagsPerPTable]uint32) {
-	rShift := (p.kBitsPerBucket * i) & (bitsPerByte - 1)
-	lShift := (rShift + p.kBitsPerBucket) & (bitsPerByte - 1)
+func (p *PackedTable) writeInBytes(i, pos uint64, codeword uint16, highBits [tagsPerPTable]uint32) {
+	rShift := (uint64(p.kBitsPerBucket) * i) & (bitsPerByte - 1)
+	lShift := (rShift + uint64(p.kBitsPerBucket)) & (bitsPerByte - 1)
 	// tag is max 32bit, store 31bit per tag, so max occupies 16 bytes
-	kBytes := (rShift + p.kBitsPerBucket + 7) / bitsPerByte
+	kBytes := (rShift + uint64(p.kBitsPerBucket) + 7) / bitsPerByte
 
 	rMask := uint8(0xff) >> (bitsPerByte - rShift)
 	lMask := uint8(0xff) << lShift
@@ -374,7 +376,7 @@ func (p *PackedTable) writeInBytes(i, pos uint, codeword uint16, highBits [tagsP
 		}
 	}
 
-	for k := uint(0); k < kBytes; k++ {
+	for k := uint64(0); k < kBytes; k++ {
 		if k < bytesPerUint64 {
 			p.buckets[pos+k] = byte(u1 >> (k * bitsPerByte))
 		} else {
@@ -386,7 +388,7 @@ func (p *PackedTable) writeInBytes(i, pos uint, codeword uint16, highBits [tagsP
 }
 
 // FindTagInBuckets find if tag in bucket i1 i2
-func (p *PackedTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool {
+func (p *PackedTable) FindTagInBuckets(i1, i2 uint64, tag uint32) bool {
 	var tags1, tags2 [tagsPerPTable]uint32
 	p.ReadBucket(i1, &tags1)
 	p.ReadBucket(i2, &tags2)
@@ -397,7 +399,7 @@ func (p *PackedTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool {
 }
 
 // DeleteTagFromBucket delete tag from bucket i
-func (p *PackedTable) DeleteTagFromBucket(i uint, tag uint32) bool {
+func (p *PackedTable) DeleteTagFromBucket(i uint64, tag uint32) bool {
 	var tags [tagsPerPTable]uint32
 	p.ReadBucket(i, &tags)
 	for j := 0; j < tagsPerPTable; j++ {
@@ -411,7 +413,7 @@ func (p *PackedTable) DeleteTagFromBucket(i uint, tag uint32) bool {
 }
 
 // InsertTagToBucket insert tag into bucket i
-func (p *PackedTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool {
+func (p *PackedTable) InsertTagToBucket(i uint64, tag uint32, kickOut bool, oldTag *uint32) bool {
 	var tags [tagsPerPTable]uint32
 	p.ReadBucket(i, &tags)
 	for j := 0; j < tagsPerPTable; j++ {
@@ -447,20 +449,44 @@ func (p *PackedTable) Info() string {
 		p.bitsPerTag, p.kDirBitsPerTag, p.numBuckets, p.SizeInTags())
 }
 
-const packedTableMetadataSize = 2+bytesPerUint32
+const (
+	packedTableMetadataSize       = 2 + bytesPerUint64
+	packedTableMetadataSizeLegacy = 2 + bytesPerUint32
+)
+
+// Reader returns a reader representing a TableBucket
+func (p *PackedTable) Reader(legacy bool) (io.Reader, uint64) {
+	var metadata []byte
+	if legacy {
+		metadata = make([]byte, packedTableMetadataSizeLegacy)
+	} else {
+		metadata = make([]byte, packedTableMetadataSize)
+	}
 
-// Encode returns a byte slice representing a TableBucket
-func (p *PackedTable) Reader() (io.Reader, uint) {
-	var metadata [packedTableMetadataSize]byte
 	metadata[0] = uint8(TableTypePacked)
 	metadata[1] = uint8(p.bitsPerTag)
-	binary.LittleEndian.PutUint32(metadata[2:], uint32(p.numBuckets))
-	return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(p.buckets)), uint(len(metadata) + len(p.buckets))
+
+	if legacy {
+		binary.LittleEndian.PutUint32(metadata[2:], uint32(p.numBuckets))
+	} else {
+		binary.LittleEndian.PutUint64(metadata[2:], p.numBuckets)
+	}
+
+	return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(p.buckets)), uint64(len(metadata)) + uint64(len(p.buckets))
 }
 
 // Decode parse a byte slice into a TableBucket
-func (p *PackedTable) Decode(b []byte) error {
+func (p *PackedTable) Decode(legacy bool, b []byte) error {
 	bitsPerTag := uint(b[1])
-	numBuckets := uint(binary.LittleEndian.Uint32(b[2:]))
-	return p.Init(0, bitsPerTag, numBuckets, b[6:])
+
+	var numBuckets, offset uint64
+	if legacy {
+		numBuckets = uint64(binary.LittleEndian.Uint32(b[2:]))
+		offset = packedTableMetadataSizeLegacy
+	} else {
+		numBuckets = binary.LittleEndian.Uint64(b[2:])
+		offset = packedTableMetadataSize
+	}
+
+	return p.Init(0, bitsPerTag, numBuckets, b[offset:])
 }
diff --git a/singletable.go b/singletable.go
index b41c63c..b30b3cc 100644
--- a/singletable.go
+++ b/singletable.go
@@ -16,11 +16,12 @@ import (
 // SingleTable the most naive table implementation: one huge bit array
 type SingleTable struct {
 	kTagsPerBucket uint
-	numBuckets     uint
+	numBuckets     uint64
 	bitsPerTag     uint
 	tagMask        uint32
 	bucket         []byte
-	len            uint
+	len            uint64
+	totalSlots     uint64
 }
 
 // NewSingleTable return a singleTable
@@ -29,34 +30,35 @@ func NewSingleTable() *SingleTable {
 }
 
 // Init init table
-func (t *SingleTable) Init(tagsPerBucket, bitsPerTag, num uint, initialBucketsHint []byte) error {
+func (t *SingleTable) Init(tagsPerBucket, bitsPerTag uint, num uint64, initialBucketsHint []byte) error {
 	t.bitsPerTag = bitsPerTag
 	t.numBuckets = num
 	t.kTagsPerBucket = tagsPerBucket
 
 	t.tagMask = (1 << bitsPerTag) - 1
-	t.len = (t.bitsPerTag*t.kTagsPerBucket*t.numBuckets + 7) >> 3
+	t.len = (uint64(t.bitsPerTag)*uint64(t.kTagsPerBucket)*t.numBuckets + 7) >> 3
 	buckets, err := getBucketsFromHint(initialBucketsHint, t.len)
 	if err != nil {
 		return err
 	}
 	t.bucket = buckets
+	t.totalSlots = uint64(t.kTagsPerBucket) * t.numBuckets
 	return nil
 }
 
 // NumBuckets return num of table buckets
-func (t *SingleTable) NumBuckets() uint {
+func (t *SingleTable) NumBuckets() uint64 {
 	return t.numBuckets
 }
 
 // SizeInBytes return bytes occupancy of table
-func (t *SingleTable) SizeInBytes() uint {
+func (t *SingleTable) SizeInBytes() uint64 {
 	return t.len
 }
 
 // SizeInTags return num of tags that table can store
-func (t *SingleTable) SizeInTags() uint {
-	return t.kTagsPerBucket * t.numBuckets
+func (t *SingleTable) SizeInTags() uint64 {
+	return t.totalSlots
 }
 
 // BitsPerItem return bits occupancy per item of table
@@ -65,8 +67,8 @@ func (t *SingleTable) BitsPerItem() uint {
 }
 
 // ReadTag read tag from bucket(i,j)
-func (t *SingleTable) ReadTag(i, j uint) uint32 {
-	pos := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) / bitsPerByte
+func (t *SingleTable) ReadTag(i, j uint64) uint32 {
+	pos := (i*uint64(t.bitsPerTag)*uint64(t.kTagsPerBucket) + uint64(t.bitsPerTag)*j) / bitsPerByte
 	var tag uint32
 	/* following code only works for little-endian */
 	switch t.bitsPerTag {
@@ -89,12 +91,12 @@ func (t *SingleTable) ReadTag(i, j uint) uint32 {
 	return tag & t.tagMask
 }
 
-func (t *SingleTable) readOutBytes(i, j, pos uint) uint32 {
-	rShift := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) & (bitsPerByte - 1)
+func (t *SingleTable) readOutBytes(i, j, pos uint64) uint32 {
+	rShift := (i*uint64(t.bitsPerTag)*uint64(t.kTagsPerBucket) + uint64(t.bitsPerTag)*j) & (bitsPerByte - 1)
 	// tag is max 32bit, so max occupies 5 bytes
-	kBytes := (rShift + t.bitsPerTag + 7) / bitsPerByte
+	kBytes := (rShift + uint64(t.bitsPerTag) + 7) / bitsPerByte
 	var tmp uint64
-	for k := uint(0); k < kBytes; k++ {
+	for k := uint64(0); k < kBytes; k++ {
 		tmp |= uint64(t.bucket[pos+k]) << (bitsPerByte * k)
 	}
 	tmp >>= rShift
@@ -102,8 +104,8 @@ func (t *SingleTable) readOutBytes(i, j, pos uint) uint32 {
 }
 
 // WriteTag write tag into bucket(i,j)
-func (t *SingleTable) WriteTag(i, j uint, n uint32) {
-	pos := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) / bitsPerByte
+func (t *SingleTable) WriteTag(i, j uint64, n uint32) {
+	pos := (i*uint64(t.bitsPerTag)*uint64(t.kTagsPerBucket) + uint64(t.bitsPerTag)*j) / bitsPerByte
 	tag := n & t.tagMask
 	/* following code only works for little-endian */
 	switch t.bitsPerTag {
@@ -146,11 +148,11 @@ func (t *SingleTable) WriteTag(i, j uint, n uint32) {
 	}
 }
 
-func (t *SingleTable) writeInBytes(i, j, pos uint, tag uint32) {
-	rShift := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) & (bitsPerByte - 1)
-	lShift := (rShift + t.bitsPerTag) & (bitsPerByte - 1)
+func (t *SingleTable) writeInBytes(i, j, pos uint64, tag uint32) {
+	rShift := (i*uint64(t.bitsPerTag)*uint64(t.kTagsPerBucket) + uint64(t.bitsPerTag)*j) & (bitsPerByte - 1)
+	lShift := (rShift + uint64(t.bitsPerTag)) & (bitsPerByte - 1)
 	// tag is max 32bit, so max occupies 5 bytes
-	kBytes := (rShift + t.bitsPerTag + 7) / bitsPerByte
+	kBytes := (rShift + uint64(t.bitsPerTag) + 7) / bitsPerByte
 
 	rMask := uint8(0xff) >> (bitsPerByte - rShift)
 	lMask := uint8(0xff) << lShift
@@ -163,15 +165,15 @@ func (t *SingleTable) writeInBytes(i, j, pos uint, tag uint32) {
 	tmp |= uint64(t.bucket[pos+end]&lMask) << (end * bitsPerByte)
 	tmp |= uint64(tag) << rShift
 
-	for k := uint(0); k < kBytes; k++ {
+	for k := uint64(0); k < kBytes; k++ {
 		t.bucket[pos+k] = byte(tmp >> (k * bitsPerByte))
 	}
 }
 
 // FindTagInBuckets find if tag in bucket i1 i2
-func (t *SingleTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool {
-	var j uint
-	for j = 0; j < t.kTagsPerBucket; j++ {
+func (t *SingleTable) FindTagInBuckets(i1, i2 uint64, tag uint32) bool {
+	var j uint64
+	for j = 0; j < uint64(t.kTagsPerBucket); j++ {
 		if t.ReadTag(i1, j) == tag || t.ReadTag(i2, j) == tag {
 			return true
 		}
@@ -180,9 +182,9 @@ func (t *SingleTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool {
 }
 
 // DeleteTagFromBucket delete tag from bucket i
-func (t *SingleTable) DeleteTagFromBucket(i uint, tag uint32) bool {
-	var j uint
-	for j = 0; j < t.kTagsPerBucket; j++ {
+func (t *SingleTable) DeleteTagFromBucket(i uint64, tag uint32) bool {
+	var j uint64
+	for j = 0; j < uint64(t.kTagsPerBucket); j++ {
 		if t.ReadTag(i, j) == tag {
 			t.WriteTag(i, j, 0)
 			return true
@@ -192,16 +194,16 @@ func (t *SingleTable) DeleteTagFromBucket(i uint, tag uint32) bool {
 }
 
 // InsertTagToBucket insert tag into bucket i
-func (t *SingleTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool {
-	var j uint
-	for j = 0; j < t.kTagsPerBucket; j++ {
+func (t *SingleTable) InsertTagToBucket(i uint64, tag uint32, kickOut bool, oldTag *uint32) bool {
+	var j uint64
+	for j = 0; j < uint64(t.kTagsPerBucket); j++ {
 		if t.ReadTag(i, j) == 0 {
 			t.WriteTag(i, j, tag)
 			return true
 		}
 	}
 	if kickOut {
-		r := uint(rand.Int31()) % t.kTagsPerBucket
+		r := uint64(uint(rand.Int31()) % t.kTagsPerBucket)
 		*oldTag = t.ReadTag(i, r)
 		t.WriteTag(i, r, tag)
 	}
@@ -224,22 +226,46 @@ func (t *SingleTable) Info() string {
 		t.bitsPerTag, t.kTagsPerBucket, t.numBuckets, t.SizeInTags())
 }
 
-const singleTableMetadataSize = 3 + bytesPerUint32
+const (
+	singleTableMetadataSize       = 3 + bytesPerUint64
+	singleTableMetadataSizeLegacy = 3 + bytesPerUint32
+)
+
+// Reader returns a reader representing a TableBucket
+func (t *SingleTable) Reader(legacy bool) (io.Reader, uint64) {
+	var metadata []byte
+	if legacy {
+		metadata = make([]byte, singleTableMetadataSizeLegacy)
+	} else {
+		metadata = make([]byte, singleTableMetadataSize)
+	}
 
-// Encode returns a byte slice representing a TableBucket
-func (t *SingleTable) Reader() (io.Reader, uint) {
-	var metadata [singleTableMetadataSize]byte
 	metadata[0] = uint8(TableTypeSingle)
 	metadata[1] = uint8(t.kTagsPerBucket)
 	metadata[2] = uint8(t.bitsPerTag)
-	binary.LittleEndian.PutUint32(metadata[3:], uint32(t.numBuckets))
-	return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(t.bucket)), uint(len(metadata) + len(t.bucket))
+
+	if legacy {
+		binary.LittleEndian.PutUint32(metadata[3:], uint32(t.numBuckets))
+	} else {
+		binary.LittleEndian.PutUint64(metadata[3:], t.numBuckets)
+	}
+
+	return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(t.bucket)), uint64(len(metadata)) + uint64(len(t.bucket))
 }
 
 // Decode parse a byte slice into a TableBucket
-func (t *SingleTable) Decode(b []byte) error {
+func (t *SingleTable) Decode(legacy bool, b []byte) error {
 	tagsPerBucket := uint(b[1])
 	bitsPerTag := uint(b[2])
-	numBuckets := uint(binary.LittleEndian.Uint32(b[3:]))
-	return t.Init(tagsPerBucket, bitsPerTag, numBuckets, b[7:])
+
+	var numBuckets, offset uint64
+	if legacy {
+		numBuckets = uint64(binary.LittleEndian.Uint32(b[3:]))
+		offset = singleTableMetadataSizeLegacy
+	} else {
+		numBuckets = binary.LittleEndian.Uint64(b[3:])
+		offset = singleTableMetadataSize
+	}
+
+	return t.Init(tagsPerBucket, bitsPerTag, numBuckets, b[offset:])
 }
diff --git a/util.go b/util.go
index 9777123..9695228 100644
--- a/util.go
+++ b/util.go
@@ -8,12 +8,12 @@ package cuckoo
 import "fmt"
 
 const (
-	bitsPerByte    = 8
-	bytesPerUint64 = 8
-	bytesPerUint32 = 4
+	bitsPerByte    uint64 = 8
+	bytesPerUint64        = 8
+	bytesPerUint32        = 4
 )
 
-func getNextPow2(n uint64) uint {
+func getNextPow2(n uint64) uint64 {
 	n--
 	n |= n >> 1
 	n |= n >> 2
@@ -22,7 +22,7 @@ func getNextPow2(n uint64) uint {
 	n |= n >> 16
 	n |= n >> 32
 	n++
-	return uint(n)
+	return n
 }
 
 func maxLoadFactor(tagsPerBucket uint) float64 {
@@ -36,12 +36,12 @@ func maxLoadFactor(tagsPerBucket uint) float64 {
 	}
 }
 
-func getBucketsFromHint(initialBucketsHint []byte, expectedLength uint) ([]byte, error) {
+func getBucketsFromHint(initialBucketsHint []byte, expectedLength uint64) ([]byte, error) {
 	result := initialBucketsHint
 	if len(result) == 0 {
 		result = make([]byte, expectedLength)
 	}
-	if uint(len(result)) != expectedLength {
+	if uint64(len(result)) != expectedLength {
 		return nil, fmt.Errorf("buckets length should be %d but got %d", expectedLength, len(result))
 	}
 	return result, nil