From 34a397b9249545569caf1860595982bf6eb05c65 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Mon, 23 Dec 2024 07:29:07 -0800 Subject: [PATCH] Improve copy of map fields. On copy construct/assign we can: - guarantee there is enough space for all elements. This avoids rehashing. - guarantee all elements are unique. This avoids lookups. PiperOrigin-RevId: 709057139 --- src/google/protobuf/map.h | 130 ++++++++++++++++++++++++------- src/google/protobuf/map_field.h | 8 +- src/google/protobuf/map_test.cc | 18 +++++ src/google/protobuf/map_test.inc | 9 ++- 4 files changed, 134 insertions(+), 31 deletions(-) diff --git a/src/google/protobuf/map.h b/src/google/protobuf/map.h index 739d17283e53..7817c85a15ed 100644 --- a/src/google/protobuf/map.h +++ b/src/google/protobuf/map.h @@ -27,19 +27,19 @@ #include #include -#include "absl/base/optimization.h" -#include "absl/memory/memory.h" -#include "google/protobuf/message_lite.h" - #include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "absl/base/prefetch.h" #include "absl/container/btree_map.h" #include "absl/hash/hash.h" #include "absl/log/absl_check.h" #include "absl/meta/type_traits.h" +#include "absl/numeric/bits.h" #include "absl/strings/string_view.h" #include "google/protobuf/arena.h" #include "google/protobuf/generated_enum_util.h" #include "google/protobuf/internal_visibility.h" +#include "google/protobuf/message_lite.h" #include "google/protobuf/port.h" #include "google/protobuf/wire_format_lite.h" @@ -365,6 +365,7 @@ class PROTOBUF_EXPORT UntypedMapBase { protected: // 16 bytes is the minimum useful size for the array cache in the arena. static constexpr map_index_t kMinTableSize = 16 / sizeof(void*); + static constexpr map_index_t kMaxTableSize = map_index_t{1} << 31; public: Arena* arena() const { return arena_; } @@ -792,6 +793,25 @@ class KeyMapBase : public UntypedMapBase { return num_buckets - num_buckets / 16 * 4 - num_buckets % 2; } + // For a particular size, calculate the lowest capacity `cap` where + // `size <= CalculateHiCutoff(cap)`. + static size_type CalculateCapacityForSize(size_type size) { + ABSL_DCHECK_NE(size, 0u); + + if (size > kMaxTableSize / 2) { + return kMaxTableSize; + } + + size_t capacity = size_type{1} << (std::numeric_limits::digits - + absl::countl_zero(size - 1)); + + if (size > CalculateHiCutoff(capacity)) { + capacity *= 2; + } + + return std::max(capacity, kMinTableSize); + } + void AssertLoadFactor() const { ABSL_DCHECK_LE(num_elements_, CalculateHiCutoff(num_buckets_)); } @@ -812,7 +832,9 @@ class KeyMapBase : public UntypedMapBase { // practice, this seems fine. if (ABSL_PREDICT_FALSE(new_size > hi_cutoff)) { if (num_buckets_ <= max_size() / 2) { - Resize(num_buckets_ * 2); + Resize(kMinTableSize > kGlobalEmptyTableSize * 2 + ? std::max(kMinTableSize, num_buckets_ * 2) + : num_buckets_ * 2); return true; } } else if (ABSL_PREDICT_FALSE(new_size <= lo_cutoff && @@ -836,13 +858,34 @@ class KeyMapBase : public UntypedMapBase { return false; } + // Interpret `head` as a linked list and insert all the nodes into `this`. + // REQUIRES: this->empty() + // REQUIRES: the input nodes have unique keys + PROTOBUF_NOINLINE void MergeIntoEmpty(NodeBase* head, size_t num_nodes) { + ABSL_DCHECK_EQ(size(), size_t{0}); + ABSL_DCHECK_NE(num_nodes, size_t{0}); + if (const map_index_t needed_capacity = CalculateCapacityForSize(num_nodes); + needed_capacity != this->num_buckets_) { + Resize(std::max(kMinTableSize, needed_capacity)); + } + num_elements_ = num_nodes; + AssertLoadFactor(); + while (head != nullptr) { + KeyNode* node = static_cast(head); + head = head->next; + absl::PrefetchToLocalCacheNta(head); + InsertUnique(BucketNumber(TS::ToView(node->key())), node); + } + } + // Resize to the given number of buckets. void Resize(map_index_t new_num_buckets) { + ABSL_DCHECK_GE(new_num_buckets, kMinTableSize); + ABSL_DCHECK(absl::has_single_bit(new_num_buckets)); if (num_buckets_ == kGlobalEmptyTableSize) { // This is the global empty array. // Just overwrite with a new one. No need to transfer or free anything. - ABSL_DCHECK_GE(kMinTableSize, new_num_buckets); - num_buckets_ = index_of_first_non_null_ = kMinTableSize; + num_buckets_ = index_of_first_non_null_ = new_num_buckets; table_ = CreateEmptyTable(num_buckets_); return; } @@ -997,7 +1040,7 @@ class Map : private internal::KeyMapBase> { private: Map(Arena* arena, const Map& other) : Map(arena) { StaticValidityCheck(); - insert(other.begin(), other.end()); + CopyFromImpl(other); } static_assert(!std::is_const::value && !std::is_const::value, @@ -1343,7 +1386,7 @@ class Map : private internal::KeyMapBase> { Map& operator=(const Map& other) ABSL_ATTRIBUTE_LIFETIME_BOUND { if (this != &other) { clear(); - insert(other.begin(), other.end()); + CopyFromImpl(other); } return *this; } @@ -1352,12 +1395,13 @@ class Map : private internal::KeyMapBase> { if (arena() == other.arena()) { InternalSwap(&other); } else { - // TODO: optimize this. The temporary copy can be allocated - // in the same arena as the other message, and the "other = copy" can - // be replaced with the fast-path swap above. - Map copy = *this; - *this = other; - other = copy; + size_t other_size = other.size(); + Node* other_copy = this->CloneFromOther(other); + other = *this; + this->clear(); + if (other_size != 0) { + this->MergeIntoEmpty(other_copy, other_size); + } } } @@ -1406,18 +1450,7 @@ class Map : private internal::KeyMapBase> { } template - std::pair TryEmplaceInternal(K&& k, Args&&... args) { - auto p = this->FindHelper(TS::ToView(k)); - internal::map_index_t b = p.bucket; - // Case 1: key was already present. - if (p.node != nullptr) - return std::make_pair(iterator(internal::UntypedMapIterator{ - static_cast(p.node), this, p.bucket}), - false); - // Case 2: insert. - if (this->ResizeIfLoadIsOutOfRange(this->num_elements_ + 1)) { - b = this->BucketNumber(TS::ToView(k)); - } + PROTOBUF_ALWAYS_INLINE Node* CreateNode(K&& k, Args&&... args) { // If K is not key_type, make the conversion to key_type explicit. using TypeToInit = typename std::conditional< std::is_same::type, key_type>::value, K&&, @@ -1437,7 +1470,50 @@ class Map : private internal::KeyMapBase> { // Note: if `T` is arena constructible, `Args` needs to be empty. Arena::CreateInArenaStorage(&node->kv.second, this->arena_, std::forward(args)...); + return node; + } + + // Copy all elements from `other`, using the arena from `this`. + // Return them as a linked list, using the `next` pointer in the node. + PROTOBUF_NOINLINE Node* CloneFromOther(const Map& other) { + Node* head = nullptr; + for (const auto& [key, value] : other) { + Node* new_node; + if constexpr (std::is_base_of_v) { + new_node = CreateNode(key); + new_node->kv.second = value; + } else { + new_node = CreateNode(key, value); + } + new_node->next = head; + head = new_node; + } + return head; + } + void CopyFromImpl(const Map& other) { + if (other.empty()) return; + // We split the logic in two: first we clone the data which requires + // Key/Value types, then we insert them all which only requires Key. + // That way we reduce code duplication. + this->MergeIntoEmpty(CloneFromOther(other), other.size()); + } + + template + std::pair TryEmplaceInternal(K&& k, Args&&... args) { + auto p = this->FindHelper(TS::ToView(k)); + internal::map_index_t b = p.bucket; + // Case 1: key was already present. + if (p.node != nullptr) { + return std::make_pair(iterator(internal::UntypedMapIterator{ + static_cast(p.node), this, p.bucket}), + false); + } + // Case 2: insert. + if (this->ResizeIfLoadIsOutOfRange(this->num_elements_ + 1)) { + b = this->BucketNumber(TS::ToView(k)); + } + auto* node = CreateNode(std::forward(k), std::forward(args)...); this->InsertUnique(b, node); ++this->num_elements_; return std::make_pair(iterator(internal::UntypedMapIterator{node, this, b}), diff --git a/src/google/protobuf/map_field.h b/src/google/protobuf/map_field.h index 5b1874ee4d4a..210c09c5c0f3 100644 --- a/src/google/protobuf/map_field.h +++ b/src/google/protobuf/map_field.h @@ -582,6 +582,10 @@ class TypeDefinedMapFieldBase : public MapFieldBase { TypeDefinedMapFieldBase(const VTable* vtable, Arena* arena) : MapFieldBase(vtable, arena), map_(arena) {} + TypeDefinedMapFieldBase(const VTable* vtable, Arena* arena, + const TypeDefinedMapFieldBase& from) + : MapFieldBase(vtable, arena), map_(arena, from.GetMap()) {} + protected: ~TypeDefinedMapFieldBase() { map_.~Map(); } @@ -657,9 +661,7 @@ class MapField final : public TypeDefinedMapFieldBase { MapField(ArenaInitialized, Arena* arena) : MapField(arena) {} MapField(InternalVisibility, Arena* arena) : MapField(arena) {} MapField(InternalVisibility, Arena* arena, const MapField& from) - : MapField(arena) { - this->MergeFromImpl(*this, from); - } + : TypeDefinedMapFieldBase(&kVTable, arena, from) {} private: typedef void InternalArenaConstructable_; diff --git a/src/google/protobuf/map_test.cc b/src/google/protobuf/map_test.cc index 923c37acd174..3aa57c3cccd4 100644 --- a/src/google/protobuf/map_test.cc +++ b/src/google/protobuf/map_test.cc @@ -186,6 +186,24 @@ TEST(MapTest, CopyConstructionMaintainsProperLoadFactor) { } } +TEST(MapTest, CalculateCapacityForSizeTest) { + for (size_t size = 1; size < 1000; ++size) { + size_t capacity = MapTestPeer::CalculateCapacityForSize(size); + // Verify is large enough for `size`. + EXPECT_LE(size, MapTestPeer::CalculateHiCutoff(capacity)); + if (capacity > MapTestPeer::kMinTableSize) { + // Verify it's the smallest capacity that's large enough. + EXPECT_GT(size, MapTestPeer::CalculateHiCutoff(capacity / 2)); + } + } + + // Verify very large size does not overflow bucket calculation. + for (size_t size : {0x30000001u, 0x40000000u, 0x50000000u, 0x60000000u, + 0x70000000u, 0x80000000u, 0x90000000u, 0xFFFFFFFFu}) { + EXPECT_EQ(0x80000000u, MapTestPeer::CalculateCapacityForSize(size)); + } +} + TEST(MapTest, AlwaysSerializesBothEntries) { for (const Message* prototype : {static_cast( diff --git a/src/google/protobuf/map_test.inc b/src/google/protobuf/map_test.inc index 6841f78e1580..883980f40133 100644 --- a/src/google/protobuf/map_test.inc +++ b/src/google/protobuf/map_test.inc @@ -15,6 +15,7 @@ #endif // _WIN32 #include +#include #include #include #include @@ -201,14 +202,20 @@ struct MapTestPeer { map.Resize(num_buckets); } - static int CalculateHiCutoff(int num_buckets) { + static size_t CalculateHiCutoff(size_t num_buckets) { return Map::CalculateHiCutoff(num_buckets); } + static size_t CalculateCapacityForSize(size_t size) { + return Map::CalculateCapacityForSize(size); + } + template static auto GetTypeInfo() { return Map::GetTypeInfo(); } + + static constexpr size_t kMinTableSize = UntypedMapBase::kMinTableSize; }; namespace {