Skip to content

Commit

Permalink
statistics: use MurmurPool to reuse the memory (#47015)
Browse files Browse the repository at this point in the history
close #47016
  • Loading branch information
hawkingrei authored Sep 18, 2023
1 parent 1b83f20 commit 35add6f
Showing 1 changed file with 25 additions and 18 deletions.
43 changes: 25 additions & 18 deletions statistics/fmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package statistics

import (
"hash"
"sync"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/sessionctx/stmtctx"
Expand All @@ -25,20 +26,24 @@ import (
"github.com/twmb/murmur3"
)

var murmur3Pool = sync.Pool{
New: func() any {
return murmur3.New64()
},
}

// FMSketch is used to count the number of distinct elements in a set.
type FMSketch struct {
hashFunc hash.Hash64
hashset map[uint64]bool
mask uint64
maxSize int
hashset map[uint64]bool
mask uint64
maxSize int
}

// NewFMSketch returns a new FM sketch.
func NewFMSketch(maxSize int) *FMSketch {
return &FMSketch{
hashset: make(map[uint64]bool),
maxSize: maxSize,
hashFunc: murmur3.New64(),
hashset: make(map[uint64]bool),
maxSize: maxSize,
}
}

Expand All @@ -52,10 +57,9 @@ func (s *FMSketch) Copy() *FMSketch {
hashset[key] = value
}
return &FMSketch{
hashset: hashset,
mask: s.mask,
maxSize: s.maxSize,
hashFunc: murmur3.New64(),
hashset: hashset,
mask: s.mask,
maxSize: s.maxSize,
}
}

Expand Down Expand Up @@ -88,31 +92,35 @@ func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Datum)
if err != nil {
return errors.Trace(err)
}
s.hashFunc.Reset()
_, err = s.hashFunc.Write(bytes)
hashFunc := murmur3Pool.Get().(hash.Hash64)
hashFunc.Reset()
defer murmur3Pool.Put(hashFunc)
_, err = hashFunc.Write(bytes)
if err != nil {
return errors.Trace(err)
}
s.insertHashValue(s.hashFunc.Sum64())
s.insertHashValue(hashFunc.Sum64())
return nil
}

// InsertRowValue inserts multi-column values to the sketch.
func (s *FMSketch) InsertRowValue(sc *stmtctx.StatementContext, values []types.Datum) error {
b := make([]byte, 0, 8)
s.hashFunc.Reset()
hashFunc := murmur3Pool.Get().(hash.Hash64)
hashFunc.Reset()
defer murmur3Pool.Put(hashFunc)
for _, v := range values {
b = b[:0]
b, err := codec.EncodeValue(sc, b, v)
if err != nil {
return err
}
_, err = s.hashFunc.Write(b)
_, err = hashFunc.Write(b)
if err != nil {
return err
}
}
s.insertHashValue(s.hashFunc.Sum64())
s.insertHashValue(hashFunc.Sum64())
return nil
}

Expand Down Expand Up @@ -188,7 +196,6 @@ func DecodeFMSketch(data []byte) (*FMSketch, error) {

// MemoryUsage returns the total memory usage of a FMSketch.
func (s *FMSketch) MemoryUsage() (sum int64) {
// In FMSketch, we will ignore the memory usage of `hashFunc`.
// As for the variables mask(uint64) and maxSize(int) each will consume 8 bytes. This is the origin of the constant 16.
// And for the variables hashset(map[uint64]bool), each element in map will consume 9 bytes(8[uint64] + 1[bool]).
sum = int64(16 + 9*len(s.hashset))
Expand Down

0 comments on commit 35add6f

Please sign in to comment.