Skip to content

Commit

Permalink
LowerBoundPrefixMap: use folly::tape
Browse files Browse the repository at this point in the history
Summary: Now that we have it, use folly::tape instead of doing this by hand.

Reviewed By: efiks

Differential Revision: D52484555

fbshipit-source-id: 70ec4cc784221d8cfd2087a1b4f9191f89f40f7a
  • Loading branch information
DenisYaroshevskiy authored and facebook-github-bot committed Jan 3, 2024
1 parent 31d546b commit 10eba7a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 72 deletions.
81 changes: 27 additions & 54 deletions mcrouter/lib/fbi/cpp/LowerBoundPrefixMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,10 @@ std::ostream& operator<<(std::ostream& os, const SmallPrefix& self) {
}

LowerBoundPrefixMapCommon::LowerBoundPrefixMapCommon(
const std::vector<std::string_view>& sortedUniquePrefixes) {
smallPrefixes_.reserve(sortedUniquePrefixes.size() + 1);
markers_.reserve(sortedUniquePrefixes.size() + 1);
previousPrefix_.reserve(sortedUniquePrefixes.size());

// total size
{
std::size_t size = 0;
for (const auto& p : sortedUniquePrefixes) {
size += p.size();
}
if (size >= std::numeric_limits<std::uint32_t>::max()) {
throw std::runtime_error(
"too many chars for LowerBoundPrefixMap: " + std::to_string(size));
}
chars_.resize(size);
}

char* cur = chars_.data();
markers_.push_back(0);
folly::string_tape sortedUniquePrefixes)
: fullPrefixes_(std::move(sortedUniquePrefixes)) {
smallPrefixes_.reserve(fullPrefixes_.size() + 1);
previousPrefix_.reserve(fullPrefixes_.size());

// Adding an empty string with no matches.
// This acts as a sentinel so we are always guranteed to find an
Expand All @@ -62,56 +46,45 @@ LowerBoundPrefixMapCommon::LowerBoundPrefixMapCommon(
// empty.
smallPrefixes_.emplace(SmallPrefix(), IndexPair{0, 0});

for (const auto& prefix : sortedUniquePrefixes) {
for (std::size_t i = 0; i != fullPrefixes_.size(); ++i) {
const auto& prefix = fullPrefixes_[i];

// Prefixes always come before lexicographically, so if there is one
// we will find it.
previousPrefix_.push_back(findPrefix(prefix));

// Add small prefix
{
std::uint32_t curPos = static_cast<std::uint32_t>(markers_.size() - 1);

// NOTE: this single element emplace is reasonably fast,
// because it's in the end. Unfortunately, if we were
// to use a hint, we'd loose 'inserted' bool, which
// makes the code clumsier. If it becomes necessary this
// code can be written with vectors for better performance.
auto [it, inserted] = smallPrefixes_.insert(
{SmallPrefix{prefix}, IndexPair{curPos, curPos + 1}});

if (!inserted) {
// Prefixes match with the last one, so we need to
// update the range.
++(it->second.second);
}
// Add small prefix ---
// if it is there already - update existing, otherwise insert [i, i+1]
//
// NOTE: this single element emplace in a sorted_vector is reasonably fast,
// because it's in the end. Unfortunately, we can't use hint for api
// reasons.
auto [it, inserted] =
smallPrefixes_.insert({SmallPrefix{prefix}, IndexPair{i, i + 1}});

if (!inserted) {
++(it->second.second);
}

cur = std::copy(prefix.begin(), prefix.end(), cur);
markers_.push_back(static_cast<std::uint32_t>(cur - chars_.data()));
}
chars_.resize(cur - chars_.data());
}

std::uint32_t LowerBoundPrefixMapCommon::findPrefix(
std::string_view query) const noexcept {
// Due to a sentinel - guaranteed to not be .begin()
auto lb = smallPrefixes_.upper_bound(SmallPrefix{query});
auto [roughFrom, roughTo] = std::prev(lb)->second;
auto afterPrefix = smallPrefixes_.upper_bound(SmallPrefix{query});
auto [roughFrom, roughTo] = std::prev(afterPrefix)->second;

// Binary search complete strings between rough boundaries.
// NOTE: which array we search - doesn't matter -
// we just want indexes.
auto cur = std::upper_bound(
markers_.begin() + roughFrom,
markers_.begin() + roughTo,
query,
[&](std::string_view q, const auto& m) {
auto i = static_cast<std::uint32_t>(&m - markers_.data());
return q < str(i);
}) -
markers_.begin();

while (cur != 0 && !std_string_view_starts_with(query, str(cur - 1))) {
fullPrefixes_.begin() + roughFrom,
fullPrefixes_.begin() + roughTo,
query) -
fullPrefixes_.begin();

while (cur != 0 &&
!std_string_view_starts_with(query, fullPrefixes_[cur - 1])) {
cur = previousPrefix_[cur - 1];
}

Expand Down
25 changes: 7 additions & 18 deletions mcrouter/lib/fbi/cpp/LowerBoundPrefixMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <folly/CPortability.h>
#include <folly/Range.h>
#include <folly/container/Iterator.h>
#include <folly/container/tape.h>
#include <folly/lang/Bits.h>
#include <folly/sorted_vector_types.h>

Expand Down Expand Up @@ -102,8 +103,7 @@ class SmallPrefix {
//
struct LowerBoundPrefixMapCommon {
LowerBoundPrefixMapCommon() = default;
explicit LowerBoundPrefixMapCommon(
const std::vector<std::string_view>& sortedUniquePrefixes);
explicit LowerBoundPrefixMapCommon(folly::string_tape sortedUniquePrefixes);

// returns 1 based indexes, 0 if not found.
std::uint32_t findPrefix(std::string_view query) const noexcept;
Expand All @@ -116,14 +116,7 @@ struct LowerBoundPrefixMapCommon {
// NOTE: in theory folly::heap_vector_map should be better here
// but benchmarks do not support that idea.
folly::sorted_vector_map<SmallPrefix, IndexPair> smallPrefixes_;

// All strings are stored contiguously in this buffer of chars, separated at
// markers_.
// markers_[0] == 0, markers_.back() == chars.size().
// This is sometimes known as a "tape"
// This is faster in the benchmarks and more cache local.
std::vector<char> chars_;
std::vector<std::uint32_t> markers_;
folly::string_tape fullPrefixes_; // sorted and unique

// Each string might have a prefix also in the array.
// a b ba baa bab
Expand All @@ -132,11 +125,6 @@ struct LowerBoundPrefixMapCommon {
// ^________|
// This is the index of that prefix, base 1 (0 means absence).
std::vector<std::uint32_t> previousPrefix_;

std::string_view str(std::uint32_t i) const {
const char* f = chars_.data() + markers_[i];
return std::string_view{f, markers_[i + 1] - markers_[i]};
}
};

template <typename Storage>
Expand All @@ -148,7 +136,7 @@ class LowerBoundPrefixMapReference {
: storage_(storage), idx_(idx) {}

[[nodiscard]] std::string_view key() const {
return storage_->searchLogic_.str(idx_);
return storage_->searchLogic_.fullPrefixes_[idx_];
}

[[nodiscard]] auto& value() const {
Expand Down Expand Up @@ -330,7 +318,7 @@ LowerBoundPrefixMap<T>::LowerBoundPrefixMap(

folly::Range sortedUnique{rend.base(), prefix2value.end()};

std::vector<std::string_view> sortedPrefixes;
folly::string_tape sortedPrefixes;
sortedPrefixes.reserve(sortedUnique.size());
storage_.values_.reserve(sortedUnique.size());

Expand All @@ -339,7 +327,8 @@ LowerBoundPrefixMap<T>::LowerBoundPrefixMap(
storage_.values_.emplace_back(std::move(value));
}

storage_.searchLogic_ = detail::LowerBoundPrefixMapCommon(sortedPrefixes);
storage_.searchLogic_ =
detail::LowerBoundPrefixMapCommon(std::move(sortedPrefixes));
}

} // namespace facebook::memcache

0 comments on commit 10eba7a

Please sign in to comment.