From c3333a5a830e29133f3b7d63ecf44b54d01e89b2 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Mon, 20 Jan 2020 19:54:13 +0530 Subject: [PATCH] Disable compression and set ZSTD Compression Level to 1 (#1191) This PR - Disables compression. By default, badger does not use any compression. - Set default ZSTD compression level to 1 Level 15 is very slow for any practical use of badger. ``` no_compression-16 10 502848865 ns/op 165.46 MB/s zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s zstd_compression/level_15-16 1 11135686219 ns/op 7.47 MB/s ``` --- options.go | 40 ++++++++++++++++++++++++---------------- table/builder_test.go | 42 +++++++++++++++++++++++++++++++++++------- 2 files changed, 59 insertions(+), 23 deletions(-) diff --git a/options.go b/options.go index f5d795d69..4fbe09199 100644 --- a/options.go +++ b/options.go @@ -21,7 +21,6 @@ import ( "github.com/dgraph-io/badger/v2/options" "github.com/dgraph-io/badger/v2/table" - "github.com/dgraph-io/badger/v2/y" ) // Note: If you add a new option X make sure you also add a WithX method on Options. @@ -102,11 +101,6 @@ type Options struct { // DefaultOptions sets a list of recommended options for good performance. // Feel free to modify these to suit your needs with the WithX methods. func DefaultOptions(path string) Options { - defaultCompression := options.ZSTD - // Use snappy as default compression algorithm if badger is built without CGO. - if !y.CgoEnabled { - defaultCompression = options.Snappy - } return Options{ Dir: path, ValueDir: path, @@ -129,16 +123,19 @@ func DefaultOptions(path string) Options { CompactL0OnClose: true, KeepL0InMemory: true, VerifyValueChecksum: false, - Compression: defaultCompression, + Compression: options.None, MaxCacheSize: 1 << 30, // 1 GB - // Benchmarking compression level against performance showed that level 15 gives - // the best speed vs ratio tradeoff. - // For a data size of 4KB we get - // Level: 3 Ratio: 2.72 Time: 24112 n/s - // Level: 10 Ratio: 2.95 Time: 75655 n/s - // Level: 15 Ratio: 4.38 Time: 239042 n/s - // See https://github.com/dgraph-io/badger/pull/1111#issue-338120757 - ZSTDCompressionLevel: 15, + // The following benchmarks were done on a 4 KB block size (default block size). The + // compression is ratio supposed to increase with increasing compression level but since the + // input for compression algorithm is small (4 KB), we don't get significant benefit at + // level 3. + // no_compression-16 10 502848865 ns/op 165.46 MB/s - + // zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93 + // zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72 + // zstd_compression/level_15-16 1 11135686219 ns/op 7.47 MB/s 4.38 + // Benchmark code can be found in table/builder_test.go file + ZSTDCompressionLevel: 1, + // Nothing to read/write value log using standard File I/O // MemoryMap to mmap() the value log files // (2^30 - 1)*2 when mmapping < 2^31 - 1, max int32. @@ -561,7 +558,18 @@ func (opt Options) WithInMemory(b bool) Options { // The ZSTD compression algorithm supports 20 compression levels. The higher the compression // level, the better is the compression ratio but lower is the performance. Lower levels // have better performance and higher levels have better compression ratios. -// The default value of ZSTDCompressionLevel is 15. +// We recommend using level 1 ZSTD Compression Level. Any level higher than 1 seems to +// deteriorate badger's performance. +// The following benchmarks were done on a 4 KB block size (default block size). The compression is +// ratio supposed to increase with increasing compression level but since the input for compression +// algorithm is small (4 KB), we don't get significant benefit at level 3. It is advised to write +// your own benchmarks before choosing a compression algorithm or level. +// +// no_compression-16 10 502848865 ns/op 165.46 MB/s - +// zstd_compression/level_1-16 7 739037966 ns/op 112.58 MB/s 2.93 +// zstd_compression/level_3-16 7 756950250 ns/op 109.91 MB/s 2.72 +// zstd_compression/level_15-16 1 11135686219 ns/op 7.47 MB/s 4.38 +// Benchmark code can be found in table/builder_test.go file func (opt Options) WithZSTDCompressionLevel(cLevel int) Options { opt.ZSTDCompressionLevel = cLevel return opt diff --git a/table/builder_test.go b/table/builder_test.go index 3af2ce358..e73e5e88f 100644 --- a/table/builder_test.go +++ b/table/builder_test.go @@ -124,14 +124,42 @@ func BenchmarkBuilder(b *testing.B) { vs := y.ValueStruct{Value: []byte(val)} keysCount := 1300000 // This number of entries consumes ~64MB of memory. - for i := 0; i < b.N; i++ { - opts := Options{BlockSize: 4 * 1024, BloomFalsePositive: 0.01} - builder := NewTableBuilder(opts) - for i := 0; i < keysCount; i++ { - builder.Add(key(i), vs, 0) - } + bench := func(b *testing.B, opt *Options) { + // KeyCount * (keySize + ValSize) + b.SetBytes(int64(keysCount) * (32 + 32)) + for i := 0; i < b.N; i++ { + opt.BlockSize = 4 * 1024 + opt.BloomFalsePositive = 0.01 + builder := NewTableBuilder(*opt) + + for i := 0; i < keysCount; i++ { + builder.Add(key(i), vs, 0) + } - _ = builder.Finish() + _ = builder.Finish() + } } + + b.Run("no compression", func(b *testing.B) { + var opt Options + opt.Compression = options.None + bench(b, &opt) + }) + b.Run("zstd compression", func(b *testing.B) { + var opt Options + opt.Compression = options.ZSTD + b.Run("level 1", func(b *testing.B) { + opt.ZSTDCompressionLevel = 1 + bench(b, &opt) + }) + b.Run("level 3", func(b *testing.B) { + opt.ZSTDCompressionLevel = 3 + bench(b, &opt) + }) + b.Run("level 15", func(b *testing.B) { + opt.ZSTDCompressionLevel = 15 + bench(b, &opt) + }) + }) }