Skip to content

Commit 8dfcfd4

Browse files
jowlyzhangfacebook-github-bot
authored andcommitted
Fix backward iteration issue when user defined timestamp is enabled in BlobDB (#11258)
Summary: During backward iteration, blob verification would fail because the user key (ts included) in `saved_key_` doesn't match the blob. This happens because during`FindValueForCurrentKey`, `saved_key_` is not updated when the user key(ts not included) is the same for all cases except when `timestamp_lb_` is specified. This breaks the blob verification logic when user defined timestamp is enabled and `timestamp_lb_` is not specified. Fix this by always updating `saved_key_` when a smaller user key (ts included) is seen. Pull Request resolved: #11258 Test Plan: `make check` `./db_blob_basic_test --gtest_filter=DBBlobWithTimestampTest.IterateBlobs` Run db_bench (built with DEBUG_LEVEL=0) to demonstrate that no overhead is introduced with: `./db_bench -user_timestamp_size=8 -db=/dev/shm/rocksdb -disable_wal=1 -benchmarks=fillseq,seekrandom[-W1-X6] -reverse_iterator=1 -seek_nexts=5` Baseline: - seekrandom [AVG 6 runs] : 72188 (± 1481) ops/sec; 37.2 (± 0.8) MB/sec With this PR: - seekrandom [AVG 6 runs] : 74171 (± 1427) ops/sec; 38.2 (± 0.7) MB/sec Reviewed By: ltamasi Differential Revision: D43675642 Pulled By: jowlyzhang fbshipit-source-id: 8022ae8522d1f66548821855e6eed63640c14e04
1 parent cf09917 commit 8dfcfd4

File tree

4 files changed

+106
-24
lines changed

4 files changed

+106
-24
lines changed

HISTORY.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* Compaction output file cutting logic now considers range tombstone start keys. For example, SST partitioner now may receive ParitionRequest for range tombstone start keys.
55

66
### Bug Fixes
7-
* Fixed an issue for backward iteration when `ReadOptions::iter_start_ts` is specified in combination with BlobDB.
7+
* Fixed an issue for backward iteration when user defined timestamp is enabled in combination with BlobDB.
88

99
### New Features
1010
* Add statistics rocksdb.secondary.cache.filter.hits, rocksdb.secondary.cache.index.hits, and rocksdb.secondary.cache.filter.hits

db/blob/db_blob_basic_test.cc

+99-11
Original file line numberDiff line numberDiff line change
@@ -2044,27 +2044,115 @@ TEST_F(DBBlobWithTimestampTest, IterateBlobs) {
20442044

20452045
ReadOptions read_options;
20462046
std::vector<std::string> read_timestamps = {Timestamp(0, 0), Timestamp(3, 0)};
2047-
Slice ts_lower_bound(read_timestamps[0]);
20482047
Slice ts_upper_bound(read_timestamps[1]);
2049-
read_options.iter_start_ts = &ts_lower_bound;
20502048
read_options.timestamp = &ts_upper_bound;
20512049

20522050
auto check_iter_entry =
20532051
[](const Iterator* iter, const std::string& expected_key,
2054-
const std::string& expected_ts, const std::string& expected_value) {
2052+
const std::string& expected_ts, const std::string& expected_value,
2053+
bool key_is_internal = true) {
20552054
ASSERT_OK(iter->status());
2056-
std::string expected_ukey_and_ts;
2057-
expected_ukey_and_ts.assign(expected_key.data(), expected_key.size());
2058-
expected_ukey_and_ts.append(expected_ts.data(), expected_ts.size());
2059-
2060-
ParsedInternalKey parsed_ikey;
2061-
ASSERT_OK(ParseInternalKey(iter->key(), &parsed_ikey,
2062-
true /* log_err_key */));
2063-
ASSERT_EQ(parsed_ikey.user_key, expected_ukey_and_ts);
2055+
if (key_is_internal) {
2056+
std::string expected_ukey_and_ts;
2057+
expected_ukey_and_ts.assign(expected_key.data(), expected_key.size());
2058+
expected_ukey_and_ts.append(expected_ts.data(), expected_ts.size());
2059+
2060+
ParsedInternalKey parsed_ikey;
2061+
ASSERT_OK(ParseInternalKey(iter->key(), &parsed_ikey,
2062+
true /* log_err_key */));
2063+
ASSERT_EQ(parsed_ikey.user_key, expected_ukey_and_ts);
2064+
} else {
2065+
ASSERT_EQ(iter->key(), expected_key);
2066+
}
20642067
ASSERT_EQ(iter->timestamp(), expected_ts);
20652068
ASSERT_EQ(iter->value(), expected_value);
20662069
};
20672070

2071+
// Forward iterating one version of each key, get in this order:
2072+
// [("key0", Timestamp(2, 0), "blob01"),
2073+
// ("key1", Timestamp(2, 0), "blob11")...]
2074+
{
2075+
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
2076+
ASSERT_OK(iter->status());
2077+
2078+
iter->SeekToFirst();
2079+
for (int i = 0; i < num_blobs; i++) {
2080+
check_iter_entry(iter.get(), keys[i], write_timestamps[1],
2081+
blobs[i] + std::to_string(1), /*key_is_internal*/ false);
2082+
iter->Next();
2083+
}
2084+
}
2085+
2086+
// Forward iteration, then reverse to backward.
2087+
{
2088+
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
2089+
ASSERT_OK(iter->status());
2090+
2091+
iter->SeekToFirst();
2092+
for (int i = 0; i < num_blobs * 2 - 1; i++) {
2093+
if (i < num_blobs) {
2094+
check_iter_entry(iter.get(), keys[i], write_timestamps[1],
2095+
blobs[i] + std::to_string(1),
2096+
/*key_is_internal*/ false);
2097+
if (i != num_blobs - 1) {
2098+
iter->Next();
2099+
}
2100+
} else {
2101+
if (i != num_blobs) {
2102+
check_iter_entry(iter.get(), keys[num_blobs * 2 - 1 - i],
2103+
write_timestamps[1],
2104+
blobs[num_blobs * 2 - 1 - i] + std::to_string(1),
2105+
/*key_is_internal*/ false);
2106+
}
2107+
iter->Prev();
2108+
}
2109+
}
2110+
}
2111+
2112+
// Backward iterating one versions of each key, get in this order:
2113+
// [("key4", Timestamp(2, 0), "blob41"),
2114+
// ("key3", Timestamp(2, 0), "blob31")...]
2115+
{
2116+
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
2117+
ASSERT_OK(iter->status());
2118+
2119+
iter->SeekToLast();
2120+
for (int i = 0; i < num_blobs; i++) {
2121+
check_iter_entry(iter.get(), keys[num_blobs - 1 - i], write_timestamps[1],
2122+
blobs[num_blobs - 1 - i] + std::to_string(1),
2123+
/*key_is_internal*/ false);
2124+
iter->Prev();
2125+
}
2126+
}
2127+
2128+
// Backward iteration, then reverse to forward.
2129+
{
2130+
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
2131+
ASSERT_OK(iter->status());
2132+
2133+
iter->SeekToLast();
2134+
for (int i = 0; i < num_blobs * 2 - 1; i++) {
2135+
if (i < num_blobs) {
2136+
check_iter_entry(iter.get(), keys[num_blobs - 1 - i],
2137+
write_timestamps[1],
2138+
blobs[num_blobs - 1 - i] + std::to_string(1),
2139+
/*key_is_internal*/ false);
2140+
if (i != num_blobs - 1) {
2141+
iter->Prev();
2142+
}
2143+
} else {
2144+
if (i != num_blobs) {
2145+
check_iter_entry(iter.get(), keys[i - num_blobs], write_timestamps[1],
2146+
blobs[i - num_blobs] + std::to_string(1),
2147+
/*key_is_internal*/ false);
2148+
}
2149+
iter->Next();
2150+
}
2151+
}
2152+
}
2153+
2154+
Slice ts_lower_bound(read_timestamps[0]);
2155+
read_options.iter_start_ts = &ts_lower_bound;
20682156
// Forward iterating multiple versions of the same key, get in this order:
20692157
// [("key0", Timestamp(2, 0), "blob01"),
20702158
// ("key0", Timestamp(1, 0), "blob00"),

db/db_iter.cc

+6-9
Original file line numberDiff line numberDiff line change
@@ -877,9 +877,14 @@ bool DBIter::FindValueForCurrentKey() {
877877
if (timestamp_lb_ != nullptr) {
878878
// Only needed when timestamp_lb_ is not null
879879
[[maybe_unused]] const bool ret = ParseKey(&ikey_);
880-
saved_ikey_.assign(iter_.key().data(), iter_.key().size());
881880
// Since the preceding ParseKey(&ikey) succeeds, so must this.
882881
assert(ret);
882+
saved_key_.SetInternalKey(ikey);
883+
} else if (user_comparator_.Compare(ikey.user_key,
884+
saved_key_.GetUserKey()) < 0) {
885+
saved_key_.SetUserKey(
886+
ikey.user_key,
887+
!pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */);
883888
}
884889

885890
valid_entry_seen = true;
@@ -964,7 +969,6 @@ bool DBIter::FindValueForCurrentKey() {
964969
if (timestamp_lb_ == nullptr) {
965970
valid_ = false;
966971
} else {
967-
saved_key_.SetInternalKey(saved_ikey_);
968972
valid_ = true;
969973
}
970974
return true;
@@ -1010,17 +1014,10 @@ bool DBIter::FindValueForCurrentKey() {
10101014
}
10111015
break;
10121016
case kTypeValue:
1013-
if (timestamp_lb_ != nullptr) {
1014-
saved_key_.SetInternalKey(saved_ikey_);
1015-
}
1016-
10171017
SetValueAndColumnsFromPlain(pinned_value_);
10181018

10191019
break;
10201020
case kTypeBlobIndex:
1021-
if (timestamp_lb_ != nullptr) {
1022-
saved_key_.SetInternalKey(saved_ikey_);
1023-
}
10241021
if (!SetBlobValueIfNeeded(saved_key_.GetUserKey(), pinned_value_)) {
10251022
return false;
10261023
}

db/db_iter.h

-3
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,6 @@ class DBIter final : public Iterator {
394394
const Slice* const timestamp_lb_;
395395
const size_t timestamp_size_;
396396
std::string saved_timestamp_;
397-
398-
// Used only if timestamp_lb_ is not nullptr.
399-
std::string saved_ikey_;
400397
};
401398

402399
// Return a new iterator that converts internal keys (yielded by

0 commit comments

Comments
 (0)