From 397e2347d0cc1ed89b70272c84f4e0dad8075529 Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Wed, 18 Aug 2021 15:12:55 -0400 Subject: [PATCH] feat(zstd): backport replacement of DataDog's zstd with Klauspost's zstd --- README.md | 1 - db.go | 4 ---- go.mod | 4 ++-- go.sum | 8 +++---- options.go | 2 ++ y/y.go | 5 ---- y/zstd.go | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ y/zstd_cgo.go | 36 ---------------------------- y/zstd_nocgo.go | 32 ------------------------- 9 files changed, 72 insertions(+), 84 deletions(-) create mode 100644 y/zstd.go delete mode 100644 y/zstd_cgo.go delete mode 100644 y/zstd_nocgo.go diff --git a/README.md b/README.md index 2a220915b..f92b82a2b 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,6 @@ $ go get github.com/dgraph-io/badger/v2 This will retrieve the library and install the `badger` command line utility into your `$GOBIN` path. -##### Note: Badger does not directly use CGO but it relies on https://github.com/DataDog/zstd for compression and it requires gcc/cgo. If you wish to use badger without gcc/cgo, you can run `CGO_ENABLED=0 go get github.com/dgraph-io/badger/...` which will download badger without the support for ZSTD compression algorithm. #### Choosing a version diff --git a/db.go b/db.go index 9a3be1ee4..cdb1f4900 100644 --- a/db.go +++ b/db.go @@ -226,10 +226,6 @@ func Open(opt Options) (db *DB, err error) { return nil, ErrInvalidLoadingMode } - // Return error if badger is built without cgo and compression is set to ZSTD. - if opt.Compression == options.ZSTD && !y.CgoEnabled { - return nil, y.ErrZstdCgo - } // Keep L0 in memory if either KeepL0InMemory is set or if InMemory is set. opt.KeepL0InMemory = opt.KeepL0InMemory || opt.InMemory diff --git a/go.mod b/go.mod index 6cb85b77c..800c406b5 100644 --- a/go.mod +++ b/go.mod @@ -3,13 +3,13 @@ module github.com/dgraph-io/badger/v2 go 1.12 require ( - github.com/DataDog/zstd v1.4.1 github.com/cespare/xxhash v1.1.0 github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 github.com/dustin/go-humanize v1.0.0 github.com/golang/protobuf v1.3.1 - github.com/golang/snappy v0.0.1 + github.com/golang/snappy v0.0.3 + github.com/klauspost/compress v1.12.3 github.com/kr/pretty v0.1.0 // indirect github.com/pkg/errors v0.8.1 github.com/spaolacci/murmur3 v1.1.0 // indirect diff --git a/go.sum b/go.sum index a4aa207f9..01b1c2599 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,4 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM= -github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= @@ -22,11 +20,13 @@ github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25Kn github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= -github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/klauspost/compress v1.12.3 h1:G5AfA94pHPysR56qqrkO2pxEexdDzrpFJ6yt/VqWxVU= +github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= diff --git a/options.go b/options.go index cae5b3770..700c87472 100644 --- a/options.go +++ b/options.go @@ -143,6 +143,7 @@ func DefaultOptions(path string) Options { // compression is ratio supposed to increase with increasing compression level but since the // input for compression algorithm is small (4 KB), we don't get significant benefit at // level 3. + // NOTE: The benchmarks are with DataDog ZSTD that requires CGO. Hence, no longer valid. // no_compression-16 10 502848865 ns/op 165.46 MB/s - // zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93 // zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72 @@ -587,6 +588,7 @@ func (opt Options) WithInMemory(b bool) Options { // algorithm is small (4 KB), we don't get significant benefit at level 3. It is advised to write // your own benchmarks before choosing a compression algorithm or level. // +// NOTE: The benchmarks are with DataDog ZSTD that requires CGO. Hence, no longer valid. // no_compression-16 10 502848865 ns/op 165.46 MB/s - // zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93 // zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72 diff --git a/y/y.go b/y/y.go index 5e6dd5b25..554a413ef 100644 --- a/y/y.go +++ b/y/y.go @@ -36,11 +36,6 @@ var ( // ErrEOF indicates an end of file when trying to read from a memory mapped file // and encountering the end of slice. ErrEOF = errors.New("End of mapped region") - - // ErrZstdCgo indicates that badger was built without cgo but ZSTD - // compression algorithm is being used for compression. ZSTD cannot work - // without CGO. - ErrZstdCgo = errors.New("zstd compression requires building badger with cgo enabled") ) const ( diff --git a/y/zstd.go b/y/zstd.go new file mode 100644 index 000000000..57018680a --- /dev/null +++ b/y/zstd.go @@ -0,0 +1,64 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package y + +import ( + "sync" + + "github.com/klauspost/compress/zstd" +) + +var ( + decoder *zstd.Decoder + encoder *zstd.Encoder + + encOnce, decOnce sync.Once +) + +// ZSTDDecompress decompresses a block using ZSTD algorithm. +func ZSTDDecompress(dst, src []byte) ([]byte, error) { + decOnce.Do(func() { + var err error + decoder, err = zstd.NewReader(nil) + Check(err) + }) + return decoder.DecodeAll(src, dst[:0]) +} + +// ZSTDCompress compresses a block using ZSTD algorithm. +func ZSTDCompress(dst, src []byte, compressionLevel int) ([]byte, error) { + encOnce.Do(func() { + var err error + level := zstd.EncoderLevelFromZstd(compressionLevel) + encoder, err = zstd.NewWriter(nil, zstd.WithEncoderLevel(level)) + Check(err) + }) + return encoder.EncodeAll(src, dst[:0]), nil +} + +// ZSTDCompressBound returns the worst case size needed for a destination buffer. +// Klauspost ZSTD library does not provide any API for Compression Bound. This +// calculation is based on the DataDog ZSTD library. +// See https://pkg.go.dev/github.com/DataDog/zstd#CompressBound +func ZSTDCompressBound(srcSize int) int { + lowLimit := 128 << 10 // 128 kB + var margin int + if srcSize < lowLimit { + margin = (lowLimit - srcSize) >> 11 + } + return srcSize + (srcSize >> 8) + margin +} diff --git a/y/zstd_cgo.go b/y/zstd_cgo.go deleted file mode 100644 index 083b70aae..000000000 --- a/y/zstd_cgo.go +++ /dev/null @@ -1,36 +0,0 @@ -// +build cgo - -/* - * Copyright 2019 Dgraph Labs, Inc. and Contributors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package y - -import ( - "github.com/DataDog/zstd" -) - -// CgoEnabled is used to check if CGO is enabled while building badger. -const CgoEnabled = true - -// ZSTDDecompress decompresses a block using ZSTD algorithm. -func ZSTDDecompress(dst, src []byte) ([]byte, error) { - return zstd.Decompress(dst, src) -} - -// ZSTDCompress compresses a block using ZSTD algorithm. -func ZSTDCompress(dst, src []byte, compressionLevel int) ([]byte, error) { - return zstd.CompressLevel(dst, src, compressionLevel) -} diff --git a/y/zstd_nocgo.go b/y/zstd_nocgo.go deleted file mode 100644 index 9ba9292ec..000000000 --- a/y/zstd_nocgo.go +++ /dev/null @@ -1,32 +0,0 @@ -// +build !cgo - -/* - * Copyright 2019 Dgraph Labs, Inc. and Contributors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package y - -// CgoEnabled is used to check if CGO is enabled while building badger. -const CgoEnabled = false - -// ZSTDDecompress decompresses a block using ZSTD algorithm. -func ZSTDDecompress(dst, src []byte) ([]byte, error) { - return nil, ErrZstdCgo -} - -// ZSTDCompress compresses a block using ZSTD algorithm. -func ZSTDCompress(dst, src []byte, compressionLevel int) ([]byte, error) { - return nil, ErrZstdCgo -}