Skip to content

Commit

Permalink
Disable compression and set ZSTD Compression Level to 1 (#1191)
Browse files Browse the repository at this point in the history
This PR
- Disables compression. By default, badger does not use any compression.
- Set default ZSTD compression level to 1
    Level 15 is very slow for any practical use of badger.

```
no_compression-16              10	 502848865 ns/op	 165.46 MB/s
zstd_compression/level_1-16     7	 739037966 ns/op	 112.58 MB/s
zstd_compression/level_3-16     7	 756950250 ns/op	 109.91 MB/s
zstd_compression/level_15-16    1	11135686219 ns/op	   7.47 MB/s
```
  • Loading branch information
Ibrahim Jarif authored Jan 20, 2020
1 parent 0acb3f6 commit c3333a5
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 23 deletions.
40 changes: 24 additions & 16 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (

"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/table"
"github.com/dgraph-io/badger/v2/y"
)

// Note: If you add a new option X make sure you also add a WithX method on Options.
Expand Down Expand Up @@ -102,11 +101,6 @@ type Options struct {
// DefaultOptions sets a list of recommended options for good performance.
// Feel free to modify these to suit your needs with the WithX methods.
func DefaultOptions(path string) Options {
defaultCompression := options.ZSTD
// Use snappy as default compression algorithm if badger is built without CGO.
if !y.CgoEnabled {
defaultCompression = options.Snappy
}
return Options{
Dir: path,
ValueDir: path,
Expand All @@ -129,16 +123,19 @@ func DefaultOptions(path string) Options {
CompactL0OnClose: true,
KeepL0InMemory: true,
VerifyValueChecksum: false,
Compression: defaultCompression,
Compression: options.None,
MaxCacheSize: 1 << 30, // 1 GB
// Benchmarking compression level against performance showed that level 15 gives
// the best speed vs ratio tradeoff.
// For a data size of 4KB we get
// Level: 3 Ratio: 2.72 Time: 24112 n/s
// Level: 10 Ratio: 2.95 Time: 75655 n/s
// Level: 15 Ratio: 4.38 Time: 239042 n/s
// See https://github.com/dgraph-io/badger/pull/1111#issue-338120757
ZSTDCompressionLevel: 15,
// The following benchmarks were done on a 4 KB block size (default block size). The
// compression is ratio supposed to increase with increasing compression level but since the
// input for compression algorithm is small (4 KB), we don't get significant benefit at
// level 3.
// no_compression-16 10 502848865 ns/op 165.46 MB/s -
// zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93
// zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72
// zstd_compression/level_15-16 1 11135686219 ns/op 7.47 MB/s 4.38
// Benchmark code can be found in table/builder_test.go file
ZSTDCompressionLevel: 1,

// Nothing to read/write value log using standard File I/O
// MemoryMap to mmap() the value log files
// (2^30 - 1)*2 when mmapping < 2^31 - 1, max int32.
Expand Down Expand Up @@ -561,7 +558,18 @@ func (opt Options) WithInMemory(b bool) Options {
// The ZSTD compression algorithm supports 20 compression levels. The higher the compression
// level, the better is the compression ratio but lower is the performance. Lower levels
// have better performance and higher levels have better compression ratios.
// The default value of ZSTDCompressionLevel is 15.
// We recommend using level 1 ZSTD Compression Level. Any level higher than 1 seems to
// deteriorate badger's performance.
// The following benchmarks were done on a 4 KB block size (default block size). The compression is
// ratio supposed to increase with increasing compression level but since the input for compression
// algorithm is small (4 KB), we don't get significant benefit at level 3. It is advised to write
// your own benchmarks before choosing a compression algorithm or level.
//
// no_compression-16 10 502848865 ns/op 165.46 MB/s -
// zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93
// zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72
// zstd_compression/level_15-16 1 11135686219 ns/op 7.47 MB/s 4.38
// Benchmark code can be found in table/builder_test.go file
func (opt Options) WithZSTDCompressionLevel(cLevel int) Options {
opt.ZSTDCompressionLevel = cLevel
return opt
Expand Down
42 changes: 35 additions & 7 deletions table/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,42 @@ func BenchmarkBuilder(b *testing.B) {
vs := y.ValueStruct{Value: []byte(val)}

keysCount := 1300000 // This number of entries consumes ~64MB of memory.
for i := 0; i < b.N; i++ {
opts := Options{BlockSize: 4 * 1024, BloomFalsePositive: 0.01}
builder := NewTableBuilder(opts)

for i := 0; i < keysCount; i++ {
builder.Add(key(i), vs, 0)
}
bench := func(b *testing.B, opt *Options) {
// KeyCount * (keySize + ValSize)
b.SetBytes(int64(keysCount) * (32 + 32))
for i := 0; i < b.N; i++ {
opt.BlockSize = 4 * 1024
opt.BloomFalsePositive = 0.01
builder := NewTableBuilder(*opt)

for i := 0; i < keysCount; i++ {
builder.Add(key(i), vs, 0)
}

_ = builder.Finish()
_ = builder.Finish()
}
}

b.Run("no compression", func(b *testing.B) {
var opt Options
opt.Compression = options.None
bench(b, &opt)
})
b.Run("zstd compression", func(b *testing.B) {
var opt Options
opt.Compression = options.ZSTD
b.Run("level 1", func(b *testing.B) {
opt.ZSTDCompressionLevel = 1
bench(b, &opt)
})
b.Run("level 3", func(b *testing.B) {
opt.ZSTDCompressionLevel = 3
bench(b, &opt)
})
b.Run("level 15", func(b *testing.B) {
opt.ZSTDCompressionLevel = 15
bench(b, &opt)
})
})
}

0 comments on commit c3333a5

Please sign in to comment.