From c568ec8799c1855193a57cb43306ddea2ec3f9ff Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 4 Jun 2019 10:17:24 -0700 Subject: [PATCH] Fix merging range tombstone covering put during flush/compaction (#5406) Summary: Flush/compaction use `MergeUntil` which has a special code path to handle a merge ending with a non-`Merge` point key. In particular if that key is a `Put` we forgot to check whether it is covered by a range tombstone. If it is covered then we must not include it in the following call to `TimedFullMerge`. Fixes #5392. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5406 Differential Revision: D15611144 Pulled By: sagar0 fbshipit-source-id: ba6a7863ca2d043f591de78fd0c4f4561f0c500e --- HISTORY.md | 3 +++ db/db_range_del_test.cc | 24 ++++++++++++++++++++++++ db/merge_helper.cc | 10 +++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index f0bc4c1a62c..2e5ad5626a7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,9 @@ * Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`. * When reading from option file/string/map, customized envs can be filled according to object registry. +### Bug Fixes +* Fix flush's/compaction's merge processing logic which allowed `Put`s covered by range tombstones to reappear. Note `Put`s may exist even if the user only ever called `Merge()` due to an internal conversion during compaction to the bottommost level. + # 5.17.2 (10/24/2018) ### Bug Fixes * Fix the bug that WriteBatchWithIndex's SeekForPrev() doesn't see the entries with the same key. diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index aad9dc3bdb5..02b759360d0 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -488,6 +488,30 @@ TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) { ASSERT_EQ(expected, actual); } +TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) { + // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4) + // Flush. The `CompactionIterator` previously had a bug where we forgot to + // check for covering range tombstones when processing the (1) Put, causing + // it to reappear after the flush. + Options opts = CurrentOptions(); + opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); + Reopen(opts); + + std::string val; + PutFixed64(&val, 1); + ASSERT_OK(db_->Put(WriteOptions(), "key", val)); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + "key", "key_")); + ASSERT_OK(db_->Merge(WriteOptions(), "key", val)); + ASSERT_OK(db_->Flush(FlushOptions())); + + ReadOptions read_opts; + std::string expected, actual; + ASSERT_OK(db_->Get(read_opts, "key", &actual)); + PutFixed64(&expected, 1); + ASSERT_EQ(expected, actual); +} + // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) { diff --git a/db/merge_helper.cc b/db/merge_helper.cc index dc6baa96302..c778edbb687 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -192,7 +192,15 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, // want. Also if we're in compaction and it's a put, it would be nice to // run compaction filter on it. const Slice val = iter->value(); - const Slice* val_ptr = (kTypeValue == ikey.type) ? &val : nullptr; + const Slice* val_ptr; + if (kTypeValue == ikey.type && + (range_del_agg == nullptr || + !range_del_agg->ShouldDelete( + ikey, RangeDelPositioningMode::kForwardTraversal))) { + val_ptr = &val; + } else { + val_ptr = nullptr; + } std::string merge_result; s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr, merge_context_.GetOperands(), &merge_result, logger_,