From 854612bcf1a4702c5b4a330ebe0444706ee1d797 Mon Sep 17 00:00:00 2001 From: Alexander Petrov Date: Fri, 20 Sep 2019 13:09:59 +0100 Subject: [PATCH 1/4] Implement shotgun4Intersect --- setutil.go | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ setutil_test.go | 79 ++++++++++++++++++++++++++++++++++----- 2 files changed, 168 insertions(+), 9 deletions(-) diff --git a/setutil.go b/setutil.go index 3e8c01dd..2419c866 100644 --- a/setutil.go +++ b/setutil.go @@ -582,6 +582,104 @@ mainwhile: return pos } +// shotgun4Intersect performs intersection between small and large arrays described in +// https://lemire.me/blog/2019/01/16/faster-intersections-between-sorted-arrays-with-shotgun/ +func shotgun4Intersect(small, large, buf []uint16) int { + if len(small) == 0 { + return 0 + } + + nS, nL := len(small), len(large) + buf = buf[:cap(buf)] + idxS, idxL := 0, 0 + pos := 0 + + for (idxS+4 <= nS) && idxL < nL { + t1, t2, t3, t4 := small[idxS], small[idxS+1], small[idxS+2], small[idxS+3] + idx1, idx2, idx3, idx4 := idxL, idxL, idxL, idxL + n := nL - idxL + + for n > 1 { + m := n >> 1 + + if large[idx1+m] < t1 { + idx1 += m + } + + if large[idx2+m] < t2 { + idx2 += m + } + + if large[idx3+m] < t3 { + idx3 += m + } + + if large[idx4+m] < t4 { + idx4 += m + } + + n -= m + } + + if large[idx1] < t1 { + idx1++ + } + + if large[idx2] < t2 { + idx2++ + } + + if large[idx3] < t3 { + idx3++ + } + + if large[idx4] < t4 { + idx4++ + } + + if idx1 < nL && large[idx1] == t1 { + buf[pos] = t1 + pos++ + } + + if idx2 < nL && large[idx2] == t2 { + buf[pos] = t2 + pos++ + } + + if idx3 < nL && large[idx3] == t3 { + buf[pos] = t3 + pos++ + } + + if idx4 < nL && large[idx4] == t4 { + buf[pos] = t4 + pos++ + } + + idxS += 4 + idxL = idx4 + } + + for idxS < nS && idxL < nL { + s := small[idxS] + idxL = advanceUntil(large, idxL, nL, s) + + if idxL == nL { + break + } + + if large[idxL] == s { + buf[pos] = s + pos++ + } + + idxS++ + } + + return pos +} + func binarySearch(array []uint16, ikey uint16) int { low := 0 high := len(array) - 1 diff --git a/setutil_test.go b/setutil_test.go index b037e02a..5b8c508f 100644 --- a/setutil_test.go +++ b/setutil_test.go @@ -4,6 +4,7 @@ package roaring import ( "github.com/stretchr/testify/assert" + "math/rand" "testing" ) @@ -92,16 +93,31 @@ func TestSetUtilIntersection(t *testing.T) { assert.Equal(t, expectedresult, result) } -func TestSetUtilIntersection2(t *testing.T) { - data1 := []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18} - data2 := []uint16{0, 3, 6, 9, 12, 15, 18} - result := make([]uint16, 0, len(data1)+len(data2)) - expectedresult := []uint16{0, 6, 12, 18} - nl := intersection2by2(data1, data2, result) - result = result[:nl] - result = result[:len(expectedresult)] +func TestSetUtilIntersectionCases(t *testing.T) { + cases := []struct { + name string + algo func(a, b, buf []uint16) int + }{ + { + name: "onesidedgallopingintersect2by2", + algo: onesidedgallopingintersect2by2, + }, + { + name: "shotgun4Intersect", + algo: shotgun4Intersect, + }, + } - assert.Equal(t, expectedresult, result) + data1 := []uint16{0, 3, 6, 9, 12, 15, 18} + data2 := []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18} + expected := []uint16{0, 6, 12, 18} + + for _, c := range cases { + result := make([]uint16, 0, len(data1)+len(data2)) + n := c.algo(data1, data2, result) + + assert.Equalf(t, expected, result[:n], "failed algorithm: %s", c.name) + } } func TestSetUtilBinarySearch(t *testing.T) { @@ -119,3 +135,48 @@ func TestSetUtilBinarySearch(t *testing.T) { } } } + +func BenchmarkIntersectAlgorithms(b *testing.B) { + sz1 := 1000 + s1 := make([]uint16, sz1) + + sz2 := MaxUint16 + s2 := make([]uint16, sz2) + + for i := 0; i < sz2; i++ { + s2[i] = uint16(i) + } + + r := rand.New(rand.NewSource(0)) + k := 0 + + for i := 0; i < sz1 && k < sz2; i++ { + n := r.Intn(100) + k += n + + // prevent adding duplicates + if n == 0 && i > 0 { + k++ + } + + s1[i] = uint16(s2[k]) + } + + buf := make([]uint16, sz1+sz2) + + b.Run("onesidedgallopingintersect2by2", func(b *testing.B) { + b.ResetTimer() + + for i := 0; i < b.N; i++ { + onesidedgallopingintersect2by2(s1, s2, buf) + } + }) + + b.Run("shotgun4", func(b *testing.B) { + b.ResetTimer() + + for i := 0; i < b.N; i++ { + shotgun4Intersect(s1, s2, buf) + } + }) +} From e03e38d189f0910b3df9ebf7a293dd7d1d6618ca Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 25 Sep 2019 13:42:11 -0400 Subject: [PATCH 2/4] Proving that gunshot works. --- serialization_test.go | 2 +- setutil.go | 76 ++++++++++++++++++++++----------------- setutil_test.go | 83 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 111 insertions(+), 50 deletions(-) diff --git a/serialization_test.go b/serialization_test.go index 80750736..50392d5b 100644 --- a/serialization_test.go +++ b/serialization_test.go @@ -419,7 +419,7 @@ func singleSliceInArray() (*Bitmap, []*Bitmap) { func singleSlice() *Bitmap { slice := make([]byte, 2) - return &Bitmap{highlowcontainer:roaringArray{keys: []uint16{0}, containers: []container{&arrayContainer{ byteSliceAsUint16Slice(slice)}}}} + return &Bitmap{highlowcontainer: roaringArray{keys: []uint16{0}, containers: []container{&arrayContainer{byteSliceAsUint16Slice(slice)}}}} } func TestByteSliceAsUint64Slice(t *testing.T) { diff --git a/setutil.go b/setutil.go index 2419c866..1bd7cbd7 100644 --- a/setutil.go +++ b/setutil.go @@ -582,6 +582,11 @@ mainwhile: return pos } +// returns -1 if x < y, zero otherwise +func branchlessComparator(x, y uint16) int { + return (int(x) - int(y)) >> 63 +} + // shotgun4Intersect performs intersection between small and large arrays described in // https://lemire.me/blog/2019/01/16/faster-intersections-between-sorted-arrays-with-shotgun/ func shotgun4Intersect(small, large, buf []uint16) int { @@ -601,58 +606,65 @@ func shotgun4Intersect(small, large, buf []uint16) int { for n > 1 { m := n >> 1 + l1, l2, l3, l4 := large[idx1+m], large[idx2+m], large[idx3+m], large[idx4+m] + idx1 += branchlessComparator(l1, t1) & m + idx2 += branchlessComparator(l2, t2) & m + idx3 += branchlessComparator(l3, t3) & m + idx4 += branchlessComparator(l4, t4) & m + n -= m + } - if large[idx1+m] < t1 { - idx1 += m + l1, l2, l3, l4 := large[idx1], large[idx2], large[idx3], large[idx4] + if idx4+1 < nL { // common case + idx1 -= branchlessComparator(l1, t1) + idx2 -= branchlessComparator(l2, t2) + idx3 -= branchlessComparator(l3, t3) + idx4 -= branchlessComparator(l4, t4) + l1, l2, l3, l4 = large[idx1], large[idx2], large[idx3], large[idx4] + } else { // slow path + if l1 < t1 { + idx1++ + if idx1 < nL { + l1 = large[idx1] + } } - - if large[idx2+m] < t2 { - idx2 += m + if l2 < t2 { + idx2++ + if idx2 < nL { + l2 = large[idx2] + } } - - if large[idx3+m] < t3 { - idx3 += m + if l3 < t3 { + idx3++ + if idx3 < nL { + l3 = large[idx3] + } } - - if large[idx4+m] < t4 { - idx4 += m + if l4 < t4 { + idx4++ + if idx4 < nL { + l4 = large[idx4] + } } - n -= m - } - - if large[idx1] < t1 { - idx1++ - } - - if large[idx2] < t2 { - idx2++ - } - - if large[idx3] < t3 { - idx3++ - } - - if large[idx4] < t4 { - idx4++ } - if idx1 < nL && large[idx1] == t1 { + if l1 == t1 { buf[pos] = t1 pos++ } - if idx2 < nL && large[idx2] == t2 { + if l2 == t2 { buf[pos] = t2 pos++ } - if idx3 < nL && large[idx3] == t3 { + if l3 == t3 { buf[pos] = t3 pos++ } - if idx4 < nL && large[idx4] == t4 { + if l4 == t4 { buf[pos] = t4 pos++ } diff --git a/setutil_test.go b/setutil_test.go index 5b8c508f..3d90d4fa 100644 --- a/setutil_test.go +++ b/setutil_test.go @@ -5,6 +5,7 @@ package roaring import ( "github.com/stretchr/testify/assert" "math/rand" + "sort" "testing" ) @@ -93,6 +94,7 @@ func TestSetUtilIntersection(t *testing.T) { assert.Equal(t, expectedresult, result) } +// go test -run TestSetUtilIntersectionCases func TestSetUtilIntersectionCases(t *testing.T) { cases := []struct { name string @@ -136,47 +138,94 @@ func TestSetUtilBinarySearch(t *testing.T) { } } +// go test -bench BenchmarkIntersectAlgorithms -run - func BenchmarkIntersectAlgorithms(b *testing.B) { - sz1 := 1000 + // sz1 is the small array + sz1 := 64 // this should not be *too* large s1 := make([]uint16, sz1) - sz2 := MaxUint16 + // to get more realistic results, we try different + // large array sizes. Our benchmarks is going to be + // an average of those... + + sz2 := 3000 s2 := make([]uint16, sz2) - for i := 0; i < sz2; i++ { - s2[i] = uint16(i) - } + sz3 := 2040 + s3 := make([]uint16, sz3) - r := rand.New(rand.NewSource(0)) - k := 0 + sz4 := 1200 + s4 := make([]uint16, sz4) - for i := 0; i < sz1 && k < sz2; i++ { - n := r.Intn(100) - k += n + r := rand.New(rand.NewSource(1234)) - // prevent adding duplicates - if n == 0 && i > 0 { - k++ - } + // We are going to populate our large arrays with + // random data. Importantly, we need to sort. + // There might be a few duplicates, by random chance, + // but it should not affect results too much. - s1[i] = uint16(s2[k]) + for i := 0; i < sz2; i++ { + s2[i] = uint16(r.Intn(MaxUint16)) + } + sort.Sort(uint16Slice(s2)) + + for i := 0; i < sz3; i++ { + s3[i] = uint16(r.Intn(MaxUint16)) } + sort.Sort(uint16Slice(s3)) - buf := make([]uint16, sz1+sz2) + for i := 0; i < sz4; i++ { + s4[i] = uint16(r.Intn(MaxUint16)) + } + sort.Sort(uint16Slice(s4)) + + buf := make([]uint16, sz1+sz2+sz3+sz4) + commonseed := 123456 + r = rand.New(rand.NewSource(commonseed)) // we set the same seed in both instances b.Run("onesidedgallopingintersect2by2", func(b *testing.B) { + b.ResetTimer() for i := 0; i < b.N; i++ { + // this is important: you want to start with a new + // small array each time otherwise onesidedgallopingintersect2by2 + // might benefit from nearly perfect branch prediction, making + // the benchmark unrealistic. + // This needs to be super fast, which it should be if sz1 is + // small enough. + for i := 0; i < sz1; i++ { + // This needs to be super fast + s1[i] = uint16(r.Intn(MaxUint16)) + } + sort.Sort(uint16Slice(s1)) // There might be duplicates, ignore them + onesidedgallopingintersect2by2(s1, s2, buf) + onesidedgallopingintersect2by2(s1, s3, buf) + onesidedgallopingintersect2by2(s1, s4, buf) + } }) + r = rand.New(rand.NewSource(commonseed)) // we set the same seed in both instances b.Run("shotgun4", func(b *testing.B) { b.ResetTimer() - for i := 0; i < b.N; i++ { + // this is important: you want to start with a new + // small array each time otherwise onesidedgallopingintersect2by2 + // might benefit from nearly perfect branch prediction, making + // the benchmark unrealistic. + // This needs to be super fast, which it should be if sz1 is + // small enough. + for i := 0; i < sz1; i++ { + s1[i] = uint16(r.Intn(MaxUint16)) + } + sort.Sort(uint16Slice(s1)) // There might be duplicates, ignore them + shotgun4Intersect(s1, s2, buf) + shotgun4Intersect(s1, s3, buf) + shotgun4Intersect(s1, s4, buf) + } }) } From fede5f78aec554cc8bbacda3d74dc91beba3499b Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 25 Sep 2019 13:49:58 -0400 Subject: [PATCH 3/4] Simple fix. --- setutil_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setutil_test.go b/setutil_test.go index 3d90d4fa..a2f391ed 100644 --- a/setutil_test.go +++ b/setutil_test.go @@ -181,7 +181,7 @@ func BenchmarkIntersectAlgorithms(b *testing.B) { buf := make([]uint16, sz1+sz2+sz3+sz4) commonseed := 123456 - r = rand.New(rand.NewSource(commonseed)) // we set the same seed in both instances + r = rand.New(rand.NewSource(int64(commonseed))) // we set the same seed in both instances b.Run("onesidedgallopingintersect2by2", func(b *testing.B) { @@ -206,7 +206,7 @@ func BenchmarkIntersectAlgorithms(b *testing.B) { } }) - r = rand.New(rand.NewSource(commonseed)) // we set the same seed in both instances + r = rand.New(rand.NewSource(int64(commonseed))) // we set the same seed in both instances b.Run("shotgun4", func(b *testing.B) { b.ResetTimer() From f4760127538ca2fa412056292a29a60ce2431344 Mon Sep 17 00:00:00 2001 From: Alexander Petrov Date: Tue, 8 Oct 2019 21:19:25 +0100 Subject: [PATCH 4/4] Added more unit tests, fixed bug with shotgun4Intersect (case {1}, {1}) --- setutil.go | 5 +++- setutil_test.go | 67 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/setutil.go b/setutil.go index 1bd7cbd7..ca9c977d 100644 --- a/setutil.go +++ b/setutil.go @@ -675,7 +675,10 @@ func shotgun4Intersect(small, large, buf []uint16) int { for idxS < nS && idxL < nL { s := small[idxS] - idxL = advanceUntil(large, idxL, nL, s) + + if s > large[idxL] { + idxL = advanceUntil(large, idxL, nL, s) + } if idxL == nL { break diff --git a/setutil_test.go b/setutil_test.go index a2f391ed..ae7cbad0 100644 --- a/setutil_test.go +++ b/setutil_test.go @@ -3,10 +3,11 @@ package roaring // to run just these tests: go test -run TestSetUtil* import ( - "github.com/stretchr/testify/assert" "math/rand" "sort" "testing" + + "github.com/stretchr/testify/assert" ) func TestSetUtilDifference(t *testing.T) { @@ -96,7 +97,7 @@ func TestSetUtilIntersection(t *testing.T) { // go test -run TestSetUtilIntersectionCases func TestSetUtilIntersectionCases(t *testing.T) { - cases := []struct { + algorithms := []struct { name string algo func(a, b, buf []uint16) int }{ @@ -110,15 +111,63 @@ func TestSetUtilIntersectionCases(t *testing.T) { }, } - data1 := []uint16{0, 3, 6, 9, 12, 15, 18} - data2 := []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18} - expected := []uint16{0, 6, 12, 18} + cases := []struct { + a, b, expected []uint16 + }{ + { + a: []uint16{}, + b: []uint16{}, + expected: []uint16{}, + }, + { + a: []uint16{1}, + b: []uint16{1}, + expected: []uint16{1}, + }, + { + a: []uint16{1}, + b: []uint16{2}, + expected: []uint16{}, + }, + { + a: []uint16{1, 2}, + b: []uint16{2, 3}, + expected: []uint16{2}, + }, + { + a: []uint16{1, 2, 3}, + b: []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18}, + expected: []uint16{2}, + }, + { + a: []uint16{0, 3, 6, 9, 12, 15, 18}, + b: []uint16{0, 2, 4, 6, 8, 10, 12, 14, 16, 18}, + expected: []uint16{0, 6, 12, 18}, + }, + { + a: []uint16{0, 3, 6, 9, 12, 15, 18}, + b: []uint16{0, 3, 6, 9, 12, 15, 18}, + expected: []uint16{0, 3, 6, 9, 12, 15, 18}, + }, + { + a: []uint16{1, 2, 3, 5, 7, 11, 13, 16, 30, 40, 100, 131, 200}, + b: []uint16{10, 60, 100}, + expected: []uint16{100}, + }, + { + a: []uint16{10, 60, 100}, + b: []uint16{1, 2, 3, 5, 7, 11, 13, 16, 30, 40, 100, 131, 200}, + expected: []uint16{100}, + }, + } - for _, c := range cases { - result := make([]uint16, 0, len(data1)+len(data2)) - n := c.algo(data1, data2, result) + for _, a := range algorithms { + for i, c := range cases { + result := make([]uint16, 0, len(c.a)+len(c.b)) + n := a.algo(c.a, c.b, result) - assert.Equalf(t, expected, result[:n], "failed algorithm: %s", c.name) + assert.Equalf(t, c.expected, result[:n], "test %d fail, algorithm: %s", i+1, a.name) + } } }