Skip to content

Commit

Permalink
Improve lz4 compression (#2614)
Browse files Browse the repository at this point in the history
* Improve lz4 compression.

- Move to v4.
- Remove not required checksuming.
- Default will now write 4M blocks, it's backward compatible for reads.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* vendor update

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
  • Loading branch information
cyriltovena authored Sep 30, 2020
1 parent 1b2bae4 commit 6500f82
Show file tree
Hide file tree
Showing 39 changed files with 1,732 additions and 1,166 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ require (
github.com/dustin/go-humanize v1.0.0
github.com/fatih/color v1.9.0
github.com/fluent/fluent-bit-go v0.0.0-20190925192703-ea13c021720c
github.com/frankban/quicktest v1.7.2 // indirect
github.com/go-kit/kit v0.10.0
github.com/go-logfmt/logfmt v0.5.0
github.com/gofrs/flock v0.7.1 // indirect
Expand All @@ -40,7 +39,8 @@ require (
github.com/mitchellh/mapstructure v1.2.2
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f
github.com/opentracing/opentracing-go v1.2.0
github.com/pierrec/lz4 v2.5.3-0.20200429092203-e876bbd321b3+incompatible
// github.com/pierrec/lz4 v2.0.5+incompatible
github.com/pierrec/lz4/v4 v4.0.2-0.20200813132121-22f5d580d5c4
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.7.1
github.com/prometheus/client_model v0.2.0
Expand Down
15 changes: 11 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ cloud.google.com/go/storage v1.3.0/go.mod h1:9IAwXhoyBJ7z9LcAwkj0/7NnPzYaPeZxxVp
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
cloud.google.com/go/storage v1.6.0 h1:UDpwYIwla4jHGzZJaEJYx1tOejbgSoNqsAfHAUYe2r8=
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
code.cloudfoundry.org/bytefmt v0.0.0-20200131002437-cf55d5288a48/go.mod h1:wN/zk7mhREp/oviagqUXY3EwuHhWyOvAdsn5Y4CzOrc=
collectd.org v0.3.0/go.mod h1:A/8DzQBkF6abtvrT2j/AU/4tiBgJWYyh0y/oB/4MlWE=
contrib.go.opencensus.io/exporter/ocagent v0.6.0/go.mod h1:zmKjrJcdo0aYcVS7bmEeSEBLPA9YJp5bjrofdU3pIXs=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
Expand Down Expand Up @@ -330,8 +331,6 @@ github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4=
github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=
github.com/frankban/quicktest v1.7.2 h1:2QxQoC1TS09S7fhCPsrvqYdvP1H5M1P1ih5ABm3BTYk=
github.com/frankban/quicktest v1.7.2/go.mod h1:jaStnuzAqU1AJdCO0l53JDCJrVDKcS03DbaAcR7Ks/o=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
Expand Down Expand Up @@ -732,6 +731,7 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/jwilder/encoding v0.0.0-20170811194829-b4e1701a28ef/go.mod h1:Ct9fl0F6iIOGgxJ5npU/IUOhOhqlVrGjyIZc8/MagT0=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
Expand Down Expand Up @@ -814,6 +814,7 @@ github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzp
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.6/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/mattn/go-sqlite3 v1.11.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/mattn/go-tty v0.0.0-20180907095812-13ff1204f104/go.mod h1:XPvLUNfbS4fJH25nqRHfWLMa1ONC8Amw+mIA639KxkE=
Expand Down Expand Up @@ -845,6 +846,8 @@ github.com/minio/minio-go/v7 v7.0.2/go.mod h1:dJ80Mv2HeGkYLH1sqS/ksz07ON6csH3S6J
github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU=
github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM=
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/mitchellh/go-homedir v1.0.0 h1:vKb8ShqSby24Yrqr/yDYkuFz8d0WUjys40rvnGC8aR0=
github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
Expand Down Expand Up @@ -955,10 +958,12 @@ github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
github.com/peterh/liner v1.0.1-0.20180619022028-8c1271fcf47f/go.mod h1:xIteQHvHuaLYG9IFj6mSxM0fCKrs34IrEQUhOYuGPHc=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pierrec/cmdflag v0.0.2/go.mod h1:a3zKGZ3cdQUfxjd0RGMLZr8xI3nvpJOB+m6o/1X5BmU=
github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc=
github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I=
github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pierrec/lz4 v2.5.3-0.20200429092203-e876bbd321b3+incompatible h1:wPraQD8xUZ14zNJcKn9cz/+n3r6H2NklrGqq7J+c5qY=
github.com/pierrec/lz4 v2.5.3-0.20200429092203-e876bbd321b3+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pierrec/lz4/v4 v4.0.2-0.20200813132121-22f5d580d5c4 h1:qId52nKbgRXnPooGimqIBgQCECmjgQedP8YsrOFPOOo=
github.com/pierrec/lz4/v4 v4.0.2-0.20200813132121-22f5d580d5c4/go.mod h1:vvUajMAuienWCEdMnA5Zb5mp0VIa9M8VvKcVEOkoAh8=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down Expand Up @@ -1056,6 +1061,7 @@ github.com/samuel/go-zookeeper v0.0.0-20200724154423-2164a8ac840e/go.mod h1:gi+0
github.com/santhosh-tekuri/jsonschema v1.2.4/go.mod h1:TEAUOeZSmIxTTuHatJzrvARHiuO9LYd+cIxzgEHCQI4=
github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/schollz/progressbar/v3 v3.3.4/go.mod h1:Rp5lZwpgtYmlvmGo1FyDwXMqagyRBQYSDwzlP9QDu84=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e/go.mod h1:tm/wZFQ8e24NYaBGIlnO2WGCAi67re4HHuOm0sftE/M=
Expand Down Expand Up @@ -1338,6 +1344,7 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
Expand Down
13 changes: 0 additions & 13 deletions pkg/chunkenc/hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package chunkenc

import (
"hash/fnv"
"hash/maphash"
"testing"

"github.com/cespare/xxhash/v2"
Expand Down Expand Up @@ -40,18 +39,6 @@ func Benchmark_xxhash(b *testing.B) {
}
}

func Benchmark_hashmap(b *testing.B) {
// I discarded hashmap/map as it will compute different value on different binary for the same entry
var h maphash.Hash
for n := 0; n < b.N; n++ {
for i := 0; i < len(testdata.LogsBytes); i++ {
h.SetSeed(maphash.MakeSeed())
_, _ = h.Write(testdata.LogsBytes[i])
res = h.Sum64()
}
}
}

func Test_xxhash_integrity(t *testing.T) {
data := []uint64{}

Expand Down
9 changes: 5 additions & 4 deletions pkg/chunkenc/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ const (
EncDumb
EncLZ4_64k
EncSnappy

// Added for testing.
EncLZ4_256k
EncLZ4_1M
EncLZ4_4M
Expand All @@ -44,6 +42,9 @@ var supportedEncoding = []Encoding{
EncGZIP,
EncLZ4_64k,
EncSnappy,
EncLZ4_256k,
EncLZ4_1M,
EncLZ4_4M,
}

func (e Encoding) String() string {
Expand All @@ -55,13 +56,13 @@ func (e Encoding) String() string {
case EncDumb:
return "dumb"
case EncLZ4_64k:
return "lz4"
return "lz4-64k"
case EncLZ4_256k:
return "lz4-256k"
case EncLZ4_1M:
return "lz4-1M"
case EncLZ4_4M:
return "lz4-4M"
return "lz4"
case EncSnappy:
return "snappy"
default:
Expand Down
4 changes: 2 additions & 2 deletions pkg/chunkenc/memchunk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ func BenchmarkWrite(b *testing.B) {
func BenchmarkRead(b *testing.B) {
for _, enc := range testEncoding {
b.Run(enc.String(), func(b *testing.B) {
chunks, size := generateData(enc)
chunks, size := generateData(enc, 5)
b.ResetTimer()
bytesRead := uint64(0)
now := time.Now()
Expand Down Expand Up @@ -631,7 +631,7 @@ func BenchmarkBackwardIterator(b *testing.B) {
func TestGenerateDataSize(t *testing.T) {
for _, enc := range testEncoding {
t.Run(enc.String(), func(t *testing.T) {
chunks, size := generateData(enc)
chunks, size := generateData(enc, 50)

bytesRead := uint64(0)
for _, c := range chunks {
Expand Down
79 changes: 26 additions & 53 deletions pkg/chunkenc/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (

"github.com/golang/snappy"
"github.com/klauspost/compress/gzip"
"github.com/pierrec/lz4"
"github.com/pierrec/lz4/v4"
"github.com/prometheus/prometheus/pkg/pool"
)

Expand Down Expand Up @@ -135,73 +135,43 @@ func (pool *GzipPool) PutWriter(writer io.WriteCloser) {
type LZ4Pool struct {
readers sync.Pool
writers sync.Pool
bufferSize int // available values: 1<<16 (64k), 1<<18 (256k), 1<<20 (1M), 1<<22 (4M). Defaults to 4MB, if not set.
}

// lz4Reader is simple wrapper around *lz4.Reader, which remembers max used block size,
// as reported by this reader. It is used to determine whether we want to reuse it,
// or throw away and garbage-collect.
type lz4Reader struct {
r *lz4.Reader
maxBlockSize int
}

func (l *lz4Reader) Read(p []byte) (n int, err error) {
return l.r.Read(p)
}

func (l *lz4Reader) Reset(src io.Reader) {
l.r.Reset(src)
}

func (l *lz4Reader) onBlockDone(_ int) {
// remember max block size used.
if l.r.BlockMaxSize > l.maxBlockSize {
l.maxBlockSize = l.r.BlockMaxSize
}
}

func newLz4Reader(src io.Reader) *lz4Reader {
lz4r := lz4.NewReader(src)
r := &lz4Reader{r: lz4r}
lz4r.OnBlockDone = r.onBlockDone
return r
bufferSize uint32 // available values: 1<<16 (64k), 1<<18 (256k), 1<<20 (1M), 1<<22 (4M). Defaults to 4MB, if not set.
}

// GetReader gets or creates a new CompressionReader and reset it to read from src
func (pool *LZ4Pool) GetReader(src io.Reader) io.Reader {
if r := pool.readers.Get(); r != nil {
reader := r.(*lz4Reader)
reader.Reset(src)
return reader
var r *lz4.Reader
if pooled := pool.readers.Get(); pooled != nil {
r = pooled.(*lz4.Reader)
r.Reset(src)
} else {
r = lz4.NewReader(src)
}
// no need to set buffer size here. Reader uses buffer size based on
// LZ4 header that it is reading.
return newLz4Reader(src)
return r
}

// PutReader places back in the pool a CompressionReader
func (pool *LZ4Pool) PutReader(reader io.Reader) {
r := reader.(*lz4Reader)
if r.maxBlockSize > pool.bufferSize {
// Readers base their buffer size based on headers from LZ4 stream.
// If this reader uses bigger buffer than what we use currently, don't pool it.
// Reading from a couple of chunks that used big buffer sizes could otherwise quickly lead
// to high pooled memory usage.
return
}
pool.readers.Put(reader)
}

// GetWriter gets or creates a new CompressionWriter and reset it to write to dst
func (pool *LZ4Pool) GetWriter(dst io.Writer) io.WriteCloser {
if w := pool.writers.Get(); w != nil {
writer := w.(*lz4.Writer)
writer.Reset(dst)
return writer
var w *lz4.Writer
if fromPool := pool.writers.Get(); fromPool != nil {
w = fromPool.(*lz4.Writer)
w.Reset(dst)
} else {
w = lz4.NewWriter(dst)
}
err := w.Apply(
lz4.ChecksumOption(false),
lz4.BlockSizeOption(lz4.BlockSize(pool.bufferSize)),
lz4.CompressionLevelOption(lz4.Fast),
)
if err != nil {
panic(err)
}
w := lz4.NewWriter(dst)
w.BlockMaxSize = pool.bufferSize
return w
}

Expand Down Expand Up @@ -277,6 +247,9 @@ type BufioReaderPool struct {
// Get returns a bufio.Reader which reads from r. The buffer size is that of the pool.
func (bufPool *BufioReaderPool) Get(r io.Reader) *bufio.Reader {
buf := bufPool.pool.Get().(*bufio.Reader)
if buf == nil {
return bufio.NewReaderSize(r, 4*1024)
}
buf.Reset(r)
return buf
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/chunkenc/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ func logprotoEntry(ts int64, line string) *logproto.Entry {
}
}

func generateData(enc Encoding) ([]Chunk, uint64) {
func generateData(enc Encoding, chunksCount int) ([]Chunk, uint64) {
chunks := []Chunk{}
i := int64(0)
size := uint64(0)

for n := 0; n < 50; n++ {
for n := 0; n < chunksCount; n++ {
entry := logprotoEntry(0, testdata.LogString(0))
c := NewMemChunk(enc, testBlockSize, testTargetSize)
for c.SpaceFor(entry) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/hack/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func fillStore() error {
labelsBuilder.Set(labels.MetricName, "logs")
metric := labelsBuilder.Labels()
fp := client.FastFingerprint(lbs)
chunkEnc := chunkenc.NewMemChunk(chunkenc.EncLZ4_64k, 262144, 1572864)
chunkEnc := chunkenc.NewMemChunk(chunkenc.EncLZ4_4M, 262144, 1572864)
for ts := start.UnixNano(); ts < start.UnixNano()+time.Hour.Nanoseconds(); ts = ts + time.Millisecond.Nanoseconds() {
entry := &logproto.Entry{
Timestamp: time.Unix(0, ts),
Expand Down
23 changes: 0 additions & 23 deletions vendor/github.com/pierrec/lz4/debug.go

This file was deleted.

7 changes: 0 additions & 7 deletions vendor/github.com/pierrec/lz4/debug_stub.go

This file was deleted.

30 changes: 0 additions & 30 deletions vendor/github.com/pierrec/lz4/errors.go

This file was deleted.

Loading

0 comments on commit 6500f82

Please sign in to comment.