diff --git a/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md b/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md new file mode 100644 index 00000000000..237351f87f1 --- /dev/null +++ b/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md @@ -0,0 +1 @@ +Add bounds check in WBWIIteratorImpl and make BaseDeltaIterator, WriteUnpreparedTxn and WritePreparedTxn respect the upper bound and lower bound in ReadOption. See 11680. \ No newline at end of file diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 0d2e7ccda4e..d12626ca8c5 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -78,6 +78,112 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, true))); #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) +TEST_P(TransactionTest, TestUpperBoundUponDeletion) { + // Reproduction from the original bug report, 11606 + // This test does writes without snapshot validation, and then tries to create + // iterator later, which is unsupported in write unprepared. + if (txn_db_options.write_policy == WRITE_UNPREPARED) { + return; + } + + WriteOptions write_options; + ReadOptions read_options; + Status s; + + Transaction* txn = db->BeginTransaction(write_options); + ASSERT_TRUE(txn); + + // Write some keys in a txn + s = txn->Put("2", "2"); + ASSERT_OK(s); + + s = txn->Put("1", "1"); + ASSERT_OK(s); + + s = txn->Delete("2"); + ASSERT_OK(s); + + read_options.iterate_upper_bound = new Slice("2", 1); + Iterator* iter = txn->GetIterator(read_options); + ASSERT_OK(iter->status()); + iter->SeekToFirst(); + while (iter->Valid()) { + ASSERT_EQ("1", iter->key().ToString()); + iter->Next(); + } + delete iter; + delete txn; + delete read_options.iterate_upper_bound; +} + +TEST_P(TransactionTest, TestTxnRespectBoundsInReadOption) { + if (txn_db_options.write_policy == WRITE_UNPREPARED) { + return; + } + + WriteOptions write_options; + + { + std::unique_ptr txn(db->BeginTransaction(write_options)); + // writes that should be observed by base_iterator_ in BaseDeltaIterator + ASSERT_OK(txn->Put("a", "aa")); + ASSERT_OK(txn->Put("c", "cc")); + ASSERT_OK(txn->Put("e", "ee")); + ASSERT_OK(txn->Put("f", "ff")); + ASSERT_TRUE(txn->Commit().ok()); + } + + std::unique_ptr txn2(db->BeginTransaction(write_options)); + // writes that should be observed by delta_iterator_ in BaseDeltaIterator + ASSERT_OK(txn2->Put("b", "bb")); + ASSERT_OK(txn2->Put("c", "cc")); + ASSERT_OK(txn2->Put("f", "ff")); + + // delta_iterator_: b c f + // base_iterator_: a c e f + // + // given range [c, f) + // assert only {c, e} can be seen + + ReadOptions ro; + ro.iterate_lower_bound = new Slice("c"); + ro.iterate_upper_bound = new Slice("f"); + std::unique_ptr iter(txn2->GetIterator(ro)); + + iter->Seek(Slice("b")); + ASSERT_EQ("c", iter->key()); // lower bound capping + iter->Seek(Slice("f")); + ASSERT_FALSE(iter->Valid()); // out of bound + + iter->SeekForPrev(Slice("f")); + ASSERT_EQ("e", iter->key()); // upper bound capping + iter->SeekForPrev(Slice("b")); + ASSERT_FALSE(iter->Valid()); // out of bound + + // move to the lower bound + iter->SeekToFirst(); + ASSERT_EQ("c", iter->key()); + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + + // move to the upper bound + iter->SeekToLast(); + ASSERT_EQ("e", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + + // reversely walk to the beginning + iter->SeekToLast(); + ASSERT_EQ("e", iter->key()); + iter->Prev(); + ASSERT_EQ("c", iter->key()); + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + + delete ro.iterate_lower_bound; + delete ro.iterate_upper_bound; +} + TEST_P(TransactionTest, DoubleEmptyWrite) { WriteOptions write_options; write_options.sync = true; diff --git a/utilities/transactions/write_prepared_txn.cc b/utilities/transactions/write_prepared_txn.cc index aa5091b9528..58126a47508 100644 --- a/utilities/transactions/write_prepared_txn.cc +++ b/utilities/transactions/write_prepared_txn.cc @@ -123,11 +123,7 @@ Status WritePreparedTxn::GetImpl(const ReadOptions& options, } Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options) { - // Make sure to get iterator from WritePrepareTxnDB, not the root db. - Iterator* db_iter = wpt_db_->NewIterator(options); - assert(db_iter); - - return write_batch_.NewIteratorWithBase(db_iter); + return GetIterator(options, wpt_db_->DefaultColumnFamily()); } Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options, @@ -136,7 +132,7 @@ Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options, Iterator* db_iter = wpt_db_->NewIterator(options, column_family); assert(db_iter); - return write_batch_.NewIteratorWithBase(column_family, db_iter); + return write_batch_.NewIteratorWithBase(column_family, db_iter, &options); } Status WritePreparedTxn::PrepareInternal() { diff --git a/utilities/transactions/write_unprepared_txn.cc b/utilities/transactions/write_unprepared_txn.cc index 4c9c2a3ddbc..c30cf9e1f04 100644 --- a/utilities/transactions/write_unprepared_txn.cc +++ b/utilities/transactions/write_unprepared_txn.cc @@ -1037,7 +1037,8 @@ Iterator* WriteUnpreparedTxn::GetIterator(const ReadOptions& options, Iterator* db_iter = wupt_db_->NewIterator(options, column_family, this); assert(db_iter); - auto iter = write_batch_.NewIteratorWithBase(column_family, db_iter); + auto iter = + write_batch_.NewIteratorWithBase(column_family, db_iter, &options); active_iterators_.push_back(iter); iter->RegisterCleanup(CleanupWriteUnpreparedWBWIIterator, this, iter); return iter; diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index b01f70a692d..bbfc60f9b48 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -299,12 +299,20 @@ WBWIIterator* WriteBatchWithIndex::NewIterator( Iterator* WriteBatchWithIndex::NewIteratorWithBase( ColumnFamilyHandle* column_family, Iterator* base_iterator, const ReadOptions* read_options) { - auto wbwiii = - new WBWIIteratorImpl(GetColumnFamilyID(column_family), &(rep->skip_list), - &rep->write_batch, &rep->comparator); + WBWIIteratorImpl* wbwiii; + if (read_options != nullptr) { + wbwiii = new WBWIIteratorImpl( + GetColumnFamilyID(column_family), &(rep->skip_list), &rep->write_batch, + &rep->comparator, read_options->iterate_lower_bound, + read_options->iterate_upper_bound); + } else { + wbwiii = new WBWIIteratorImpl(GetColumnFamilyID(column_family), + &(rep->skip_list), &rep->write_batch, + &rep->comparator); + } + return new BaseDeltaIterator(column_family, base_iterator, wbwiii, - GetColumnFamilyUserComparator(column_family), - read_options); + GetColumnFamilyUserComparator(column_family)); } Iterator* WriteBatchWithIndex::NewIteratorWithBase(Iterator* base_iterator) { diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index 11feb5a7e8f..bedd5934d5b 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -20,8 +20,7 @@ namespace ROCKSDB_NAMESPACE { BaseDeltaIterator::BaseDeltaIterator(ColumnFamilyHandle* column_family, Iterator* base_iterator, WBWIIteratorImpl* delta_iterator, - const Comparator* comparator, - const ReadOptions* read_options) + const Comparator* comparator) : forward_(true), current_at_base_(true), equal_keys_(false), @@ -29,9 +28,7 @@ BaseDeltaIterator::BaseDeltaIterator(ColumnFamilyHandle* column_family, column_family_(column_family), base_iterator_(base_iterator), delta_iterator_(delta_iterator), - comparator_(comparator), - iterate_upper_bound_(read_options ? read_options->iterate_upper_bound - : nullptr) { + comparator_(comparator) { assert(base_iterator_); assert(delta_iterator_); assert(comparator_); @@ -332,14 +329,6 @@ void BaseDeltaIterator::UpdateCurrent() { // Finished return; } - if (iterate_upper_bound_) { - if (comparator_->CompareWithoutTimestamp( - delta_entry.key, /*a_has_ts=*/false, *iterate_upper_bound_, - /*b_has_ts=*/false) >= 0) { - // out of upper bound -> finished. - return; - } - } if (delta_result == WBWIIteratorImpl::kDeleted && merge_context_.GetNumOperands() == 0) { AdvanceDelta(); diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index d8bab54ed6f..c4135ad3264 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -34,8 +34,7 @@ class BaseDeltaIterator : public Iterator { public: BaseDeltaIterator(ColumnFamilyHandle* column_family, Iterator* base_iterator, WBWIIteratorImpl* delta_iterator, - const Comparator* comparator, - const ReadOptions* read_options = nullptr); + const Comparator* comparator); ~BaseDeltaIterator() override {} @@ -72,7 +71,6 @@ class BaseDeltaIterator : public Iterator { std::unique_ptr base_iterator_; std::unique_ptr delta_iterator_; const Comparator* comparator_; // not owned - const Slice* iterate_upper_bound_; MergeContext merge_context_; std::string merge_result_; Slice value_; @@ -200,59 +198,107 @@ class WBWIIteratorImpl : public WBWIIterator { WBWIIteratorImpl(uint32_t column_family_id, WriteBatchEntrySkipList* skip_list, const ReadableWriteBatch* write_batch, - WriteBatchEntryComparator* comparator) + WriteBatchEntryComparator* comparator, + const Slice* iterate_lower_bound = nullptr, + const Slice* iterate_upper_bound = nullptr) : column_family_id_(column_family_id), skip_list_iter_(skip_list), write_batch_(write_batch), - comparator_(comparator) {} + comparator_(comparator), + iterate_lower_bound_(iterate_lower_bound), + iterate_upper_bound_(iterate_upper_bound) {} ~WBWIIteratorImpl() override {} bool Valid() const override { - if (!skip_list_iter_.Valid()) { - return false; - } - const WriteBatchIndexEntry* iter_entry = skip_list_iter_.key(); - return (iter_entry != nullptr && - iter_entry->column_family == column_family_id_); + return !out_of_bound_ && ValidRegardlessOfBoundLimit(); } void SeekToFirst() override { - WriteBatchIndexEntry search_entry( - nullptr /* search_key */, column_family_id_, - true /* is_forward_direction */, true /* is_seek_to_first */); - skip_list_iter_.Seek(&search_entry); + if (iterate_lower_bound_ != nullptr) { + WriteBatchIndexEntry search_entry( + iterate_lower_bound_ /* search_key */, column_family_id_, + true /* is_forward_direction */, false /* is_seek_to_first */); + skip_list_iter_.Seek(&search_entry); + } else { + WriteBatchIndexEntry search_entry( + nullptr /* search_key */, column_family_id_, + true /* is_forward_direction */, true /* is_seek_to_first */); + skip_list_iter_.Seek(&search_entry); + } + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } void SeekToLast() override { - WriteBatchIndexEntry search_entry( - nullptr /* search_key */, column_family_id_ + 1, - true /* is_forward_direction */, true /* is_seek_to_first */); + WriteBatchIndexEntry search_entry = + (iterate_upper_bound_ != nullptr) + ? WriteBatchIndexEntry( + iterate_upper_bound_ /* search_key */, column_family_id_, + true /* is_forward_direction */, false /* is_seek_to_first */) + : WriteBatchIndexEntry( + nullptr /* search_key */, column_family_id_ + 1, + true /* is_forward_direction */, true /* is_seek_to_first */); + skip_list_iter_.Seek(&search_entry); if (!skip_list_iter_.Valid()) { skip_list_iter_.SeekToLast(); } else { skip_list_iter_.Prev(); } + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } void Seek(const Slice& key) override { + if (BeforeLowerBound(&key)) { // cap to prevent out of bound + SeekToFirst(); + return; + } + WriteBatchIndexEntry search_entry(&key, column_family_id_, true /* is_forward_direction */, false /* is_seek_to_first */); skip_list_iter_.Seek(&search_entry); + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } void SeekForPrev(const Slice& key) override { + if (AtOrAfterUpperBound(&key)) { // cap to prevent out of bound + SeekToLast(); + return; + } + WriteBatchIndexEntry search_entry(&key, column_family_id_, false /* is_forward_direction */, false /* is_seek_to_first */); skip_list_iter_.SeekForPrev(&search_entry); + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } - void Next() override { skip_list_iter_.Next(); } + void Next() override { + skip_list_iter_.Next(); + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } + } - void Prev() override { skip_list_iter_.Prev(); } + void Prev() override { + skip_list_iter_.Prev(); + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } + } WriteEntry Entry() const override; @@ -293,6 +339,45 @@ class WBWIIteratorImpl : public WBWIIterator { WriteBatchEntrySkipList::Iterator skip_list_iter_; const ReadableWriteBatch* write_batch_; WriteBatchEntryComparator* comparator_; + const Slice* iterate_lower_bound_; + const Slice* iterate_upper_bound_; + bool out_of_bound_ = false; + + bool TestOutOfBound() const { + const Slice& curKey = Entry().key; + return AtOrAfterUpperBound(&curKey) || BeforeLowerBound(&curKey); + } + + bool ValidRegardlessOfBoundLimit() const { + if (!skip_list_iter_.Valid()) { + return false; + } + const WriteBatchIndexEntry* iter_entry = skip_list_iter_.key(); + return iter_entry != nullptr && + iter_entry->column_family == column_family_id_; + } + + bool AtOrAfterUpperBound(const Slice* k) const { + if (iterate_upper_bound_ == nullptr) { + return false; + } + + return comparator_->GetComparator(column_family_id_) + ->CompareWithoutTimestamp(*k, /*a_has_ts=*/false, + *iterate_upper_bound_, + /*b_has_ts=*/false) >= 0; + } + + bool BeforeLowerBound(const Slice* k) const { + if (iterate_lower_bound_ == nullptr) { + return false; + } + + return comparator_->GetComparator(column_family_id_) + ->CompareWithoutTimestamp(*k, /*a_has_ts=*/false, + *iterate_lower_bound_, + /*b_has_ts=*/false) < 0; + } }; class WriteBatchWithIndexInternal { diff --git a/utilities/write_batch_with_index/write_batch_with_index_test.cc b/utilities/write_batch_with_index/write_batch_with_index_test.cc index c69dd39a2f0..95333d8f470 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -1640,6 +1640,104 @@ TEST_P(WriteBatchWithIndexTest, TestNewIteratorWithBaseFromWbwi) { ASSERT_OK(iter->status()); } +TEST_P(WriteBatchWithIndexTest, TestBoundsCheckingInDeltaIterator) { + Status s = OpenDB(); + ASSERT_OK(s); + + KVMap empty_map; + + // writes that should be observed by BaseDeltaIterator::delta_iterator_ + ASSERT_OK(batch_->Put("a", "aa")); + ASSERT_OK(batch_->Put("b", "bb")); + ASSERT_OK(batch_->Put("c", "cc")); + + ReadOptions ro; + + auto check_only_b_is_visible = [&]() { + std::unique_ptr iter(batch_->NewIteratorWithBase( + db_->DefaultColumnFamily(), new KVIter(&empty_map), &ro)); + + // move to the lower bound + iter->SeekToFirst(); + ASSERT_EQ("b", iter->key()); + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + + // move to the upper bound + iter->SeekToLast(); + ASSERT_EQ("b", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + + // test bounds checking in Seek and SeekForPrev + iter->Seek(Slice("a")); + ASSERT_EQ("b", iter->key()); + iter->Seek(Slice("b")); + ASSERT_EQ("b", iter->key()); + iter->Seek(Slice("c")); + ASSERT_FALSE(iter->Valid()); + + iter->SeekForPrev(Slice("c")); + ASSERT_EQ("b", iter->key()); + iter->SeekForPrev(Slice("b")); + ASSERT_EQ("b", iter->key()); + iter->SeekForPrev(Slice("a")); + ASSERT_FALSE(iter->Valid()); + + iter->SeekForPrev( + Slice("a.1")); // a non-existent key that is smaller than "b" + ASSERT_FALSE(iter->Valid()); + + iter->Seek(Slice("b.1")); // a non-existent key that is greater than "b" + ASSERT_FALSE(iter->Valid()); + + delete ro.iterate_lower_bound; + delete ro.iterate_upper_bound; + }; + + ro.iterate_lower_bound = new Slice("b"); + ro.iterate_upper_bound = new Slice("c"); + check_only_b_is_visible(); + + ro.iterate_lower_bound = new Slice("a.1"); + ro.iterate_upper_bound = new Slice("c"); + check_only_b_is_visible(); + + ro.iterate_lower_bound = new Slice("b"); + ro.iterate_upper_bound = new Slice("b.2"); + check_only_b_is_visible(); +} + +TEST_P(WriteBatchWithIndexTest, + TestBoundsCheckingInSeekToFirstAndLastOfDeltaIterator) { + Status s = OpenDB(); + ASSERT_OK(s); + KVMap empty_map; + // writes that should be observed by BaseDeltaIterator::delta_iterator_ + ASSERT_OK(batch_->Put("c", "cc")); + + ReadOptions ro; + auto check_nothing_visible = [&]() { + std::unique_ptr iter(batch_->NewIteratorWithBase( + db_->DefaultColumnFamily(), new KVIter(&empty_map), &ro)); + iter->SeekToFirst(); + ASSERT_FALSE(iter->Valid()); + iter->SeekToLast(); + ASSERT_FALSE(iter->Valid()); + + delete ro.iterate_lower_bound; + delete ro.iterate_upper_bound; + }; + + ro.iterate_lower_bound = new Slice("b"); + ro.iterate_upper_bound = new Slice("c"); + check_nothing_visible(); + + ro.iterate_lower_bound = new Slice("d"); + ro.iterate_upper_bound = new Slice("e"); + check_nothing_visible(); +} + TEST_P(WriteBatchWithIndexTest, SavePointTest) { ColumnFamilyHandleImplDummy cf1(1, BytewiseComparator()); KVMap empty_map;