From 55ee6cd500dc1f6b97eb5b36b80fb0044f04dae6 Mon Sep 17 00:00:00 2001 From: NamanJain8 Date: Wed, 26 May 2021 17:10:46 +0530 Subject: [PATCH] feat(compression): use klauspost zstd when cgo is not enabled --- README.md | 2 +- db.go | 5 ----- go.mod | 3 ++- go.sum | 6 ++++-- options.go | 2 ++ y/y.go | 5 ----- y/zstd_cgo.go | 3 --- y/zstd_nocgo.go | 39 ++++++++++++++++++++++++++++++++++----- 8 files changed, 43 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index e1c0ba168..b8f51cd70 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ $ go get github.com/dgraph-io/badger/v3 ``` This will retrieve the library. -##### Note: Badger does not directly use CGO but it relies on https://github.com/DataDog/zstd for compression and it requires gcc/cgo. If you wish to use badger without gcc/cgo, you can run `CGO_ENABLED=0 go get github.com/dgraph-io/badger/v3` which will download badger without the support for ZSTD compression algorithm. +##### Note: Badger does not directly use CGO but it relies on https://github.com/DataDog/zstd for compression and it requires gcc/cgo. If you wish to use badger without gcc/cgo, you can run `CGO_ENABLED=0 go get github.com/dgraph-io/badger/v3` which will download badger with https://github.com/klauspost/compress ZSTD compression that does not require CGO. #### Installing Badger Command Line Tool diff --git a/db.go b/db.go index 31b4f2667..02db7ef76 100644 --- a/db.go +++ b/db.go @@ -172,11 +172,6 @@ func checkAndSetOptions(opt *Options) error { return ErrValueLogSize } - // Return error if badger is built without cgo and compression is set to ZSTD. - if opt.Compression == options.ZSTD && !y.CgoEnabled { - return y.ErrZstdCgo - } - if opt.ReadOnly { // Do not perform compaction in read only mode. opt.CompactL0OnClose = false diff --git a/go.mod b/go.mod index 0e2a361e8..36b0f0327 100644 --- a/go.mod +++ b/go.mod @@ -11,9 +11,10 @@ require ( github.com/dustin/go-humanize v1.0.0 github.com/gogo/protobuf v1.3.2 github.com/golang/protobuf v1.3.1 - github.com/golang/snappy v0.0.1 + github.com/golang/snappy v0.0.3 github.com/google/flatbuffers v1.12.0 github.com/google/go-cmp v0.5.4 // indirect + github.com/klauspost/compress v1.12.3 github.com/kr/pretty v0.1.0 // indirect github.com/pkg/errors v0.9.1 github.com/spaolacci/murmur3 v1.1.0 // indirect diff --git a/go.sum b/go.sum index e97ba7656..45d83b606 100644 --- a/go.sum +++ b/go.sum @@ -34,8 +34,8 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= -github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/flatbuffers v1.12.0 h1:/PtAHvnBY4Kqnx/xCQ3OIV9uYcSFGScBsWI3Oogeh6w= github.com/google/flatbuffers v1.12.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -46,6 +46,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.12.3 h1:G5AfA94pHPysR56qqrkO2pxEexdDzrpFJ6yt/VqWxVU= +github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= diff --git a/options.go b/options.go index 442240777..9bf977e22 100644 --- a/options.go +++ b/options.go @@ -163,6 +163,7 @@ func DefaultOptions(path string) Options { // compression is ratio supposed to increase with increasing compression level but since the // input for compression algorithm is small (4 KB), we don't get significant benefit at // level 3. + // NOTE: The benchmarks are with DataDog ZSTD that requires CGO. // no_compression-16 10 502848865 ns/op 165.46 MB/s - // zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93 // zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72 @@ -729,6 +730,7 @@ func (opt Options) WithInMemory(b bool) Options { // algorithm is small (4 KB), we don't get significant benefit at level 3. It is advised to write // your own benchmarks before choosing a compression algorithm or level. // +// NOTE: The benchmarks are with DataDog ZSTD that requires CGO. // no_compression-16 10 502848865 ns/op 165.46 MB/s - // zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93 // zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72 diff --git a/y/y.go b/y/y.go index 22c8035f9..5947ff784 100644 --- a/y/y.go +++ b/y/y.go @@ -40,11 +40,6 @@ var ( // and encountering the end of slice. ErrEOF = errors.New("ErrEOF: End of file") - // ErrZstdCgo indicates that badger was built without cgo but ZSTD - // compression algorithm is being used for compression. ZSTD cannot work - // without CGO. - ErrZstdCgo = errors.New("ErrZstdCgo: zstd compression requires building badger with cgo enabled") - // ErrCommitAfterFinish indicates that write batch commit was called after // finish ErrCommitAfterFinish = errors.New("Batch commit not permitted after finish") diff --git a/y/zstd_cgo.go b/y/zstd_cgo.go index 0ad881226..ddb4850b0 100644 --- a/y/zstd_cgo.go +++ b/y/zstd_cgo.go @@ -22,9 +22,6 @@ import ( "github.com/DataDog/zstd" ) -// CgoEnabled is used to check if CGO is enabled while building badger. -const CgoEnabled = true - // ZSTDDecompress decompresses a block using ZSTD algorithm. func ZSTDDecompress(dst, src []byte) ([]byte, error) { return zstd.Decompress(dst, src) diff --git a/y/zstd_nocgo.go b/y/zstd_nocgo.go index 8f8d7f479..819d6664a 100644 --- a/y/zstd_nocgo.go +++ b/y/zstd_nocgo.go @@ -18,20 +18,49 @@ package y -// CgoEnabled is used to check if CGO is enabled while building badger. -const CgoEnabled = false +import ( + "sync" + + "github.com/klauspost/compress/zstd" +) + +var ( + decoder *zstd.Decoder + encoder *zstd.Encoder + + encOnce, decOnce sync.Once +) // ZSTDDecompress decompresses a block using ZSTD algorithm. func ZSTDDecompress(dst, src []byte) ([]byte, error) { - return nil, ErrZstdCgo + decOnce.Do(func() { + var err error + decoder, err = zstd.NewReader(nil) + Check(err) + }) + return decoder.DecodeAll(src, dst[:0]) } // ZSTDCompress compresses a block using ZSTD algorithm. func ZSTDCompress(dst, src []byte, compressionLevel int) ([]byte, error) { - return nil, ErrZstdCgo + decOnce.Do(func() { + var err error + level := zstd.EncoderLevelFromZstd(compressionLevel) + encoder, err = zstd.NewWriter(nil, zstd.WithEncoderLevel(level)) + Check(err) + }) + return encoder.EncodeAll(src, dst[:0]), nil } // ZSTDCompressBound returns the worst case size needed for a destination buffer. +// Klauspost ZSTD library does not provide any API for Compression Bound. This +// calculation is based on the DataDog ZSTD library. +// See https://pkg.go.dev/github.com/DataDog/zstd#CompressBound func ZSTDCompressBound(srcSize int) int { - panic("ZSTD only supported in Cgo.") + lowLimit := 128 << 10 // 128 kB + var margin int + if srcSize < lowLimit { + margin = (lowLimit - srcSize) >> 11 + } + return srcSize + (srcSize >> 8) + margin }