From e867d29a979715cd4156214308556bc7bf942cd5 Mon Sep 17 00:00:00 2001 From: Manish R Jain Date: Tue, 9 Feb 2021 14:32:01 -0800 Subject: [PATCH] Use Badger's value log threshold of 1MB (#7415). The write amplification of using value log has been unpredictably high. With this change, we'd use 1MB as ValueThreshold to push as many things as possible into the LSM tree. Previously it was 1KB. --- dgraph/cmd/bulk/reduce.go | 1 - go.mod | 2 +- go.sum | 4 ++-- worker/file_handler.go | 2 -- worker/restore.go | 1 - worker/server_state.go | 1 - 6 files changed, 3 insertions(+), 8 deletions(-) diff --git a/dgraph/cmd/bulk/reduce.go b/dgraph/cmd/bulk/reduce.go index 9036ac271cf..6a5487b432e 100644 --- a/dgraph/cmd/bulk/reduce.go +++ b/dgraph/cmd/bulk/reduce.go @@ -132,7 +132,6 @@ func (r *reducer) createBadgerInternal(dir string, compression bool) *badger.DB opt := badger.DefaultOptions(dir). WithSyncWrites(false). - WithValueThreshold(1 << 20 /* 1 KB */). WithEncryptionKey(key). WithBlockCacheSize(r.opt.BlockCacheSize). WithIndexCacheSize(r.opt.IndexCacheSize) diff --git a/go.mod b/go.mod index 606deb91cdb..0d2febb7d2d 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/OneOfOne/xxhash v1.2.5 // indirect github.com/blevesearch/bleve v1.0.13 github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd - github.com/dgraph-io/badger/v3 v3.2011.1 + github.com/dgraph-io/badger/v3 v3.2011.2-0.20210210142907-44c9230e5a66 github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6 github.com/dgraph-io/gqlgen v0.13.2 github.com/dgraph-io/gqlparser/v2 v2.1.5 diff --git a/go.sum b/go.sum index 9aacc044e1e..e2b99a34413 100644 --- a/go.sum +++ b/go.sum @@ -116,8 +116,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger v1.6.0 h1:DshxFxZWXUcO0xX476VJC07Xsr6ZCBVRHKZ93Oh7Evo= github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= -github.com/dgraph-io/badger/v3 v3.2011.1 h1:Hmyof0WMEF/QtutX5SQHzIMnJQxb/IrSzhjckV2SD6g= -github.com/dgraph-io/badger/v3 v3.2011.1/go.mod h1:0rLLrQpKVQAL0or/lBLMQznhr6dWWX7h5AKnmnqx268= +github.com/dgraph-io/badger/v3 v3.2011.2-0.20210210142907-44c9230e5a66 h1:k2FNYVVH2tKhcPoX8PDc0RUhCYei/+LebiKZEdFt+Ec= +github.com/dgraph-io/badger/v3 v3.2011.2-0.20210210142907-44c9230e5a66/go.mod h1:0rLLrQpKVQAL0or/lBLMQznhr6dWWX7h5AKnmnqx268= github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6 h1:toHzMCdCUgYsjM0cW9+wafnKFXfp1HizIJUyzihN+vk= github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6/go.mod h1:rHa+h3kI4M8ASOirxyIyNeXBfHFgeskVUum2OrDMN3U= github.com/dgraph-io/gqlgen v0.13.2 h1:TNhndk+eHKj5qE7BenKKSYdSIdOGhLqxR1rCiMso9KM= diff --git a/worker/file_handler.go b/worker/file_handler.go index 433d46602f1..7f72f8784a9 100644 --- a/worker/file_handler.go +++ b/worker/file_handler.go @@ -306,7 +306,6 @@ func (h *fileHandler) ExportBackup(backupDir, exportDir, format string, // file reader and verifying the encryption in the backup file. db, err := badger.OpenManaged(badger.DefaultOptions(dir). WithSyncWrites(false). - WithValueThreshold(1 << 10). WithNumVersionsToKeep(math.MaxInt32). WithEncryptionKey(key)) @@ -358,7 +357,6 @@ func (h *fileHandler) ExportBackup(backupDir, exportDir, format string, dir := filepath.Join(tmpDir, fmt.Sprintf("p%d", group)) db, err := badger.OpenManaged(badger.DefaultOptions(dir). WithSyncWrites(false). - WithValueThreshold(1 << 10). WithNumVersionsToKeep(math.MaxInt32). WithEncryptionKey(key)) diff --git a/worker/restore.go b/worker/restore.go index c5d45eddcc9..f3f9a92e136 100644 --- a/worker/restore.go +++ b/worker/restore.go @@ -68,7 +68,6 @@ func RunRestore(pdir, location, backupId string, key x.SensitiveByteSlice, ctype WithCompression(ctype). WithZSTDCompressionLevel(clevel). WithSyncWrites(false). - WithValueThreshold(1 << 10). WithBlockCacheSize(100 * (1 << 20)). WithIndexCacheSize(100 * (1 << 20)). WithNumVersionsToKeep(math.MaxInt32). diff --git a/worker/server_state.go b/worker/server_state.go index 923b63ccc4c..864c31ef8e5 100644 --- a/worker/server_state.go +++ b/worker/server_state.go @@ -107,7 +107,6 @@ func (s *ServerState) initStorage() { // for posting lists, so the cost of sync writes is amortized. x.Check(os.MkdirAll(Config.PostingDir, 0700)) opt := badger.DefaultOptions(Config.PostingDir). - WithValueThreshold(1 << 10 /* 1KB */). WithNumVersionsToKeep(math.MaxInt32). WithBlockCacheSize(Config.PBlockCacheSize). WithIndexCacheSize(Config.PIndexCacheSize)