Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize the prefix performance of RocksDB iterator after delete range #5525

Merged
merged 7 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions src/kvstore/RocksEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,10 @@ nebula::cpp2::ErrorCode RocksEngine::range(const std::string& start,
const std::string& end,
std::unique_ptr<KVIterator>* storageIter) {
memory::MemoryCheckOffGuard guard;
storageIter->reset(new RocksRangeIter(start, end));
rocksdb::ReadOptions options;
options.iterate_upper_bound =
dynamic_cast<RocksRangeIter*>(storageIter->get())->iterateUpperBound();
if (!isPlainTable_) {
options.total_order_seek = FLAGS_enable_rocksdb_prefix_filtering;
} else {
Expand All @@ -196,8 +199,8 @@ nebula::cpp2::ErrorCode RocksEngine::range(const std::string& start,
std::unique_ptr<rocksdb::Iterator> iter(db_->NewIterator(options));
if (iter) {
iter->Seek(rocksdb::Slice(start));
dynamic_cast<RocksRangeIter*>(storageIter->get())->setIterator(std::move(iter));
}
storageIter->reset(new RocksRangeIter(std::move(iter), start, end));
return nebula::cpp2::ErrorCode::SUCCEEDED;
}

Expand All @@ -218,23 +221,29 @@ nebula::cpp2::ErrorCode RocksEngine::prefixWithExtractor(const std::string& pref
const void* snapshot,
std::unique_ptr<KVIterator>* storageIter) {
memory::MemoryCheckOffGuard guard;
storageIter->reset(new RocksPrefixIter(prefix));
rocksdb::ReadOptions options;
options.iterate_upper_bound =
dynamic_cast<RocksPrefixIter*>(storageIter->get())->iterateUpperBound();
if (UNLIKELY(snapshot != nullptr)) {
options.snapshot = reinterpret_cast<const rocksdb::Snapshot*>(snapshot);
}
options.prefix_same_as_start = true;
std::unique_ptr<rocksdb::Iterator> iter(db_->NewIterator(options));
if (iter) {
iter->Seek(rocksdb::Slice(prefix));
dynamic_cast<RocksPrefixIter*>(storageIter->get())->setIterator(std::move(iter));
}
storageIter->reset(new RocksPrefixIter(std::move(iter), prefix));
return nebula::cpp2::ErrorCode::SUCCEEDED;
}

nebula::cpp2::ErrorCode RocksEngine::prefixWithoutExtractor(
const std::string& prefix, const void* snapshot, std::unique_ptr<KVIterator>* storageIter) {
memory::MemoryCheckOffGuard guard;
storageIter->reset(new RocksPrefixIter(prefix));
rocksdb::ReadOptions options;
options.iterate_upper_bound =
dynamic_cast<RocksPrefixIter*>(storageIter->get())->iterateUpperBound();
if (snapshot != nullptr) {
options.snapshot = reinterpret_cast<const rocksdb::Snapshot*>(snapshot);
}
Expand All @@ -243,16 +252,19 @@ nebula::cpp2::ErrorCode RocksEngine::prefixWithoutExtractor(
std::unique_ptr<rocksdb::Iterator> iter(db_->NewIterator(options));
if (iter) {
iter->Seek(rocksdb::Slice(prefix));
dynamic_cast<RocksPrefixIter*>(storageIter->get())->setIterator(std::move(iter));
}
storageIter->reset(new RocksPrefixIter(std::move(iter), prefix));
return nebula::cpp2::ErrorCode::SUCCEEDED;
}

nebula::cpp2::ErrorCode RocksEngine::rangeWithPrefix(const std::string& start,
const std::string& prefix,
std::unique_ptr<KVIterator>* storageIter) {
memory::MemoryCheckOffGuard guard;
storageIter->reset(new RocksPrefixIter(prefix));
rocksdb::ReadOptions options;
options.iterate_upper_bound =
dynamic_cast<RocksPrefixIter*>(storageIter->get())->iterateUpperBound();
if (!isPlainTable_) {
options.total_order_seek = FLAGS_enable_rocksdb_prefix_filtering;
} else {
Expand All @@ -261,8 +273,8 @@ nebula::cpp2::ErrorCode RocksEngine::rangeWithPrefix(const std::string& start,
std::unique_ptr<rocksdb::Iterator> iter(db_->NewIterator(options));
if (iter) {
iter->Seek(rocksdb::Slice(start));
dynamic_cast<RocksPrefixIter*>(storageIter->get())->setIterator(std::move(iter));
}
storageIter->reset(new RocksPrefixIter(std::move(iter), prefix));
return nebula::cpp2::ErrorCode::SUCCEEDED;
}

Expand Down
46 changes: 44 additions & 2 deletions src/kvstore/RocksEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <memory>

#include "common/base/Base.h"
#include "common/utils/NebulaKeyUtils.h"
#include "kvstore/KVEngine.h"
#include "kvstore/KVIterator.h"
#include "kvstore/RocksEngineConfig.h"
Expand All @@ -32,6 +33,10 @@ class RocksRangeIter : public KVIterator {
RocksRangeIter(std::unique_ptr<rocksdb::Iterator> iter, rocksdb::Slice start, rocksdb::Slice end)
: iter_(std::move(iter)), start_(start), end_(end) {}

RocksRangeIter(rocksdb::Slice start, rocksdb::Slice end) : start_(start), end_(end) {
iterateUpperBound_.reset(new rocksdb::Slice(end.data(), end.size()));
}

~RocksRangeIter() = default;

bool valid() const override {
Expand All @@ -54,10 +59,25 @@ class RocksRangeIter : public KVIterator {
return folly::StringPiece(iter_->value().data(), iter_->value().size());
}

rocksdb::Slice* iterateUpperBound() const {
luyade marked this conversation as resolved.
Show resolved Hide resolved
return iterateUpperBound_.get();
}

void setIterator(rocksdb::Iterator* iter) {
luyade marked this conversation as resolved.
Show resolved Hide resolved
iter_.reset(iter);
}

void setIterator(std::unique_ptr<rocksdb::Iterator> iter) {
iter_ = std::move(iter);
}

private:
std::unique_ptr<rocksdb::Iterator> iter_;
std::unique_ptr<rocksdb::Iterator> iter_{nullptr};
rocksdb::Slice start_;
rocksdb::Slice end_;
// Make sure that the lifetime of iterate_upper_bound in rocksdb::ReadOptions
// is the same as RocksRangeIter object
std::unique_ptr<rocksdb::Slice> iterateUpperBound_;
luyade marked this conversation as resolved.
Show resolved Hide resolved
};

/**
Expand All @@ -70,6 +90,11 @@ class RocksPrefixIter : public KVIterator {
RocksPrefixIter(std::unique_ptr<rocksdb::Iterator> iter, rocksdb::Slice prefix)
: iter_(std::move(iter)), prefix_(prefix) {}

explicit RocksPrefixIter(rocksdb::Slice prefix) : prefix_(prefix) {
iterateUpperBoundKey_ = NebulaKeyUtils::lastKey(prefix_.ToString(), 128);
luyade marked this conversation as resolved.
Show resolved Hide resolved
iterateUpperBound_.reset(new rocksdb::Slice(iterateUpperBoundKey_));
}

~RocksPrefixIter() = default;

bool valid() const override {
Expand All @@ -92,9 +117,26 @@ class RocksPrefixIter : public KVIterator {
return folly::StringPiece(iter_->value().data(), iter_->value().size());
}

rocksdb::Slice* iterateUpperBound() const {
luyade marked this conversation as resolved.
Show resolved Hide resolved
return iterateUpperBound_.get();
}

void setIterator(rocksdb::Iterator* iter) {
luyade marked this conversation as resolved.
Show resolved Hide resolved
iter_.reset(iter);
}

void setIterator(std::unique_ptr<rocksdb::Iterator> iter) {
iter_ = std::move(iter);
}

protected:
std::unique_ptr<rocksdb::Iterator> iter_;
std::unique_ptr<rocksdb::Iterator> iter_{nullptr};
rocksdb::Slice prefix_;
// temporary key to make sure data_ of iterateUpperBound_ valid during its lifetime
std::string iterateUpperBoundKey_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it's temporary, I prefer keeping its lifetime temp as well, that is, not setting as protected/private variable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iterateUpperBoundKey_ is not really temporary. This string should be valid during the whole lifetime of this iterator. Otherwise, if the string got destroyed before iterator, the iterate_upper_bound of rocksdb::ReadOptions will become invalid, which will then result in crash.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then the field iterateUpperBound_ is no needed, you could construct rocksdb::Slice from upperBoundKey_ every time when needed. Since rocksdb::Slice is an string view, and really lightweight.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated. We passed a pointer to ReadOptions and we should make sure the pointer valid before the iterator is destructed, so the Slice also should not be a local temporary variable.

// Make sure that the lifetime of iterate_upper_bound in rocksdb::ReadOptions
// is the same as RocksPrefixIter object
std::unique_ptr<rocksdb::Slice> iterateUpperBound_;
};

/**
Expand Down