Skip to content

Commit ccca8cb

Browse files
authored
Merge pull request #47 from axw/binaryappender
Implement BinaryAppender
2 parents 30a916b + 44cf830 commit ccca8cb

File tree

4 files changed

+107
-52
lines changed

4 files changed

+107
-52
lines changed

compressed.go

+24-27
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package hyperloglog
22

3-
import "encoding/binary"
3+
import (
4+
"encoding/binary"
5+
"slices"
6+
)
47

58
// Original author of this file is github.com/clarkduvall/hyperloglog
69
type iterable interface {
@@ -52,32 +55,26 @@ func (v *compressedList) Clone() *compressedList {
5255
return newV
5356
}
5457

55-
func (v *compressedList) MarshalBinary() (data []byte, err error) {
56-
// Marshal the variableLengthList
57-
bdata, err := v.b.MarshalBinary()
58-
if err != nil {
59-
return nil, err
60-
}
61-
62-
// At least 4 bytes for the two fixed sized values plus the size of bdata.
63-
data = make([]byte, 0, 4+4+len(bdata))
58+
func (v *compressedList) AppendBinary(data []byte) ([]byte, error) {
59+
// At least 4 bytes for the two fixed sized values
60+
data = slices.Grow(data, 4+4)
6461

6562
// Marshal the count and last values.
66-
data = append(data, []byte{
63+
data = append(data,
6764
// Number of items in the list.
68-
byte(v.count >> 24),
69-
byte(v.count >> 16),
70-
byte(v.count >> 8),
65+
byte(v.count>>24),
66+
byte(v.count>>16),
67+
byte(v.count>>8),
7168
byte(v.count),
7269
// The last item in the list.
73-
byte(v.last >> 24),
74-
byte(v.last >> 16),
75-
byte(v.last >> 8),
70+
byte(v.last>>24),
71+
byte(v.last>>16),
72+
byte(v.last>>8),
7673
byte(v.last),
77-
}...)
74+
)
7875

79-
// Append the list
80-
return append(data, bdata...), nil
76+
// Append the variableLengthList
77+
return v.b.AppendBinary(data)
8178
}
8279

8380
func (v *compressedList) UnmarshalBinary(data []byte) error {
@@ -130,20 +127,20 @@ func (v *compressedList) Iter() *iterator {
130127

131128
type variableLengthList []uint8
132129

133-
func (v variableLengthList) MarshalBinary() (data []byte, err error) {
130+
func (v variableLengthList) AppendBinary(data []byte) ([]byte, error) {
134131
// 4 bytes for the size of the list, and a byte for each element in the
135132
// list.
136-
data = make([]byte, 0, 4+v.Len())
133+
data = slices.Grow(data, 4+v.Len())
137134

138135
// Length of the list. We only need 32 bits because the size of the set
139136
// couldn't exceed that on 32 bit architectures.
140137
sz := v.Len()
141-
data = append(data, []byte{
142-
byte(sz >> 24),
143-
byte(sz >> 16),
144-
byte(sz >> 8),
138+
data = append(data,
139+
byte(sz>>24),
140+
byte(sz>>16),
141+
byte(sz>>8),
145142
byte(sz),
146-
}...)
143+
)
147144

148145
// Marshal each element in the list.
149146
for i := 0; i < sz; i++ {

hyperloglog.go

+17-13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"math"
8+
"slices"
89
"sort"
910
)
1011

@@ -203,8 +204,16 @@ func (sk *Sketch) mergeSparse() {
203204
}
204205

205206
// MarshalBinary implements the encoding.BinaryMarshaler interface.
207+
//
208+
// When the result will be appended to another buffer, consider using
209+
// AppendBinary to avoid additional allocations and copying.
206210
func (sk *Sketch) MarshalBinary() (data []byte, err error) {
207-
data = make([]byte, 0, 8+len(sk.regs))
211+
return sk.AppendBinary(nil)
212+
}
213+
214+
// AppendBinary implements the encoding.BinaryAppender interface.
215+
func (sk *Sketch) AppendBinary(data []byte) ([]byte, error) {
216+
data = slices.Grow(data, 8+len(sk.regs))
208217
// Marshal a version marker.
209218
data = append(data, version)
210219
// Marshal p.
@@ -217,31 +226,26 @@ func (sk *Sketch) MarshalBinary() (data []byte, err error) {
217226
data = append(data, byte(1))
218227

219228
// Add the tmp_set
220-
tsdata, err := sk.tmpSet.MarshalBinary()
229+
data, err := sk.tmpSet.AppendBinary(data)
221230
if err != nil {
222231
return nil, err
223232
}
224-
data = append(data, tsdata...)
225233

226234
// Add the sparse Sketch
227-
sdata, err := sk.sparseList.MarshalBinary()
228-
if err != nil {
229-
return nil, err
230-
}
231-
return append(data, sdata...), nil
235+
return sk.sparseList.AppendBinary(data)
232236
}
233237

234238
// It's using the dense Sketch.
235239
data = append(data, byte(0))
236240

237241
// Add the dense sketch Sketch.
238242
sz := len(sk.regs)
239-
data = append(data, []byte{
240-
byte(sz >> 24),
241-
byte(sz >> 16),
242-
byte(sz >> 8),
243+
data = append(data,
244+
byte(sz>>24),
245+
byte(sz>>16),
246+
byte(sz>>8),
243247
byte(sz),
244-
}...)
248+
)
245249

246250
// Marshal each element in the list.
247251
for _, v := range sk.regs {

hyperloglog_test.go

+53
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"math"
88
"math/rand"
99
"reflect"
10+
"slices"
1011
"testing"
1112

1213
"github.com/davecgh/go-spew/spew"
@@ -469,6 +470,58 @@ func TestHLL_Unmarshal_ErrorTooShort(t *testing.T) {
469470
}
470471
}
471472

473+
func TestHLL_AppendBinary(t *testing.T) {
474+
sk := NewTestSketch(16)
475+
for i := 0; i < 10; i++ {
476+
sk.InsertHash(uint64(rand.Int()))
477+
}
478+
data1, err := sk.MarshalBinary()
479+
require.NoError(t, err)
480+
481+
bufSize := rand.Intn(100)
482+
buf := make([]byte, bufSize)
483+
for i := range buf {
484+
buf[i] = byte(rand.Intn(256))
485+
}
486+
bufCopy := slices.Clone(buf)
487+
data2, err := sk.AppendBinary(buf)
488+
require.NoError(t, err)
489+
490+
require.Len(t, data2, len(data1)+len(bufCopy))
491+
require.Equal(t, bufCopy, data2[:len(bufCopy)])
492+
require.Equal(t, data1, data2[len(bufCopy):])
493+
}
494+
495+
func Benchmark_HLL_Marshal(b *testing.B) {
496+
run := func(precision uint8, sparse bool) {
497+
name := fmt.Sprintf("precision%d_", precision)
498+
if sparse {
499+
name += "sparse"
500+
} else {
501+
name += "dense"
502+
}
503+
b.Run(name, func(b *testing.B) {
504+
sk, _ := NewSketch(precision, sparse)
505+
for i := 0; i < 1000; i++ {
506+
sk.InsertHash(uint64(rand.Int()))
507+
}
508+
b.Run("MarshalBinary", func(b *testing.B) {
509+
for i := 0; i < b.N; i++ {
510+
_, _ = sk.MarshalBinary()
511+
}
512+
})
513+
b.Run("AppendBinary", func(b *testing.B) {
514+
var buf []byte
515+
for i := 0; i < b.N; i++ {
516+
buf, _ = sk.AppendBinary(buf[:0])
517+
}
518+
})
519+
})
520+
}
521+
run(16, true)
522+
run(16, false)
523+
}
524+
472525
func TestHLL_Clone(t *testing.T) {
473526
sk1 := NewTestSketch(16)
474527

sparse.go

+13-12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package hyperloglog
22

33
import (
44
"math/bits"
5+
"slices"
56

67
"github.com/kamstrup/intmap"
78
)
@@ -83,29 +84,29 @@ func (s *set) Clone() *set {
8384
return &set{m: newS}
8485
}
8586

86-
func (s *set) MarshalBinary() (data []byte, err error) {
87+
func (s *set) AppendBinary(data []byte) ([]byte, error) {
8788
// 4 bytes for the size of the set, and 4 bytes for each key.
8889
// list.
89-
data = make([]byte, 0, 4+(4*s.m.Len()))
90+
data = slices.Grow(data, 4+(4*s.m.Len()))
9091

9192
// Length of the set. We only need 32 bits because the size of the set
9293
// couldn't exceed that on 32 bit architectures.
9394
sl := s.m.Len()
94-
data = append(data, []byte{
95-
byte(sl >> 24),
96-
byte(sl >> 16),
97-
byte(sl >> 8),
95+
data = append(data,
96+
byte(sl>>24),
97+
byte(sl>>16),
98+
byte(sl>>8),
9899
byte(sl),
99-
}...)
100+
)
100101

101102
// Marshal each element in the set.
102103
s.m.ForEach(func(k uint32) bool {
103-
data = append(data, []byte{
104-
byte(k >> 24),
105-
byte(k >> 16),
106-
byte(k >> 8),
104+
data = append(data,
105+
byte(k>>24),
106+
byte(k>>16),
107+
byte(k>>8),
107108
byte(k),
108-
}...)
109+
)
109110
return true
110111
})
111112

0 commit comments

Comments
 (0)