diff --git a/CHANGELOG.md b/CHANGELOG.md index 114bca03521..af0bc16fbbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * (PR [#????](https://github.com/realm/realm-core/pull/????)) * Cut the runtime of aggregate operations on large dictionaries in half ([PR #5864](https://github.com/realm/realm-core/pull/5864)). * Improve performance of aggregate operations on collections of objects by 2x to 10x ([PR #5864](https://github.com/realm/realm-core/pull/5864)). +* Greatly improve the performance of sorting or distincting a Dictionary's keys or values. The most expensive operation is now performed O(log N) rather than O(N log N) times, and large Dictionaries can see upwards of 99% reduction in time to sort. ([PR #5166](https://github.com/realm/realm-core/pulls/5166)) ### Fixed * ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?) diff --git a/src/realm/dictionary.cpp b/src/realm/dictionary.cpp index bdcebacc9ff..3972991a6e1 100644 --- a/src/realm/dictionary.cpp +++ b/src/realm/dictionary.cpp @@ -491,27 +491,49 @@ void Dictionary::align_indices(std::vector& indices) const } } -void Dictionary::sort(std::vector& indices, bool ascending) const +namespace { +std::vector get_keys(const Dictionary& dict) +{ + std::vector values; + values.reserve(dict.size()); + for (auto it = dict.begin(), end = dict.end(); it != end; ++it) + values.push_back(it.key()); + return values; +} + +std::vector get_values(const Dictionary& dict) +{ + std::vector values; + values.reserve(dict.size()); + for (auto it = dict.begin(), end = dict.end(); it != end; ++it) + values.push_back(it.value()); + return values; +} + +void do_sort(std::vector& indices, bool ascending, const std::vector& values) { - align_indices(indices); auto b = indices.begin(); auto e = indices.end(); - std::sort(b, e, [this, ascending](size_t i1, size_t i2) { - auto v1 = get_any(i1); - auto v2 = get_any(i2); - return ascending ? v1 < v2 : v2 < v1; + std::sort(b, e, [ascending, &values](size_t i1, size_t i2) { + return ascending ? values[i1] < values[i2] : values[i2] < values[i1]; }); } +} // anonymous namespace -void Dictionary::distinct(std::vector& indices, util::Optional ascending) const +void Dictionary::sort(std::vector& indices, bool ascending) const { align_indices(indices); + do_sort(indices, ascending, get_values(*this)); +} - bool sort_ascending = ascending ? *ascending : true; - sort(indices, sort_ascending); +void Dictionary::distinct(std::vector& indices, util::Optional ascending) const +{ + align_indices(indices); + auto values = get_values(*this); + do_sort(indices, ascending.value_or(true), values); indices.erase(std::unique(indices.begin(), indices.end(), - [this](size_t i1, size_t i2) { - return get_any(i1) == get_any(i2); + [&values](size_t i1, size_t i2) { + return values[i1] == values[i2]; }), indices.end()); @@ -524,13 +546,7 @@ void Dictionary::distinct(std::vector& indices, util::Optional asc void Dictionary::sort_keys(std::vector& indices, bool ascending) const { align_indices(indices); - auto b = indices.begin(); - auto e = indices.end(); - std::sort(b, e, [this, ascending](size_t i1, size_t i2) { - auto k1 = get_key(i1); - auto k2 = get_key(i2); - return ascending ? k1 < k2 : k2 < k1; - }); + do_sort(indices, ascending, get_keys(*this)); } void Dictionary::distinct_keys(std::vector& indices, util::Optional) const @@ -1112,34 +1128,38 @@ Dictionary::Iterator::Iterator(const Dictionary* dict, size_t pos) { } -auto Dictionary::Iterator::operator*() const -> value_type +Mixed Dictionary::Iterator::key() const { - update(); - Mixed key; switch (m_key_type) { case type_String: { ArrayString keys(m_tree.get_alloc()); ref_type ref = to_ref(Array::get(m_leaf.get_mem().get_addr(), 1)); keys.init_from_ref(ref); - key = Mixed(keys.get(m_state.m_current_index)); - break; + return Mixed(keys.get(m_state.m_current_index)); } case type_Int: { ArrayInteger keys(m_tree.get_alloc()); ref_type ref = to_ref(Array::get(m_leaf.get_mem().get_addr(), 1)); keys.init_from_ref(ref); - key = Mixed(keys.get(m_state.m_current_index)); - break; + return Mixed(keys.get(m_state.m_current_index)); } default: throw std::runtime_error("Not implemented"); - break; } +} + +Mixed Dictionary::Iterator::value() const +{ ArrayMixed values(m_tree.get_alloc()); ref_type ref = to_ref(Array::get(m_leaf.get_mem().get_addr(), 2)); values.init_from_ref(ref); + return values.get(m_state.m_current_index); +} - return std::make_pair(key, values.get(m_state.m_current_index)); +auto Dictionary::Iterator::operator*() const -> value_type +{ + update(); + return std::make_pair(key(), value()); } diff --git a/src/realm/dictionary.hpp b/src/realm/dictionary.hpp index 34d374815a9..2891f200029 100644 --- a/src/realm/dictionary.hpp +++ b/src/realm/dictionary.hpp @@ -219,6 +219,9 @@ class Dictionary::Iterator : public ClusterTree::Iterator { using ClusterTree::Iterator::get_position; + Mixed key() const; + Mixed value() const; + private: friend class Dictionary; diff --git a/test/benchmark-common-tasks/main.cpp b/test/benchmark-common-tasks/main.cpp index 1ce031bf9ef..5dcd02cd015 100644 --- a/test/benchmark-common-tasks/main.cpp +++ b/test/benchmark-common-tasks/main.cpp @@ -1224,7 +1224,7 @@ struct BenchmarkSortInt : BenchmarkWithInts { struct BenchmarkSortIntList : Benchmark { const char* name() const { - return "BenchmarkSortIntList"; + return "SortIntList"; } void before_all(DBRef group) @@ -1267,12 +1267,9 @@ struct BenchmarkSortIntList : Benchmark { }; struct BenchmarkSortIntDictionary : Benchmark { - // Dictionary sorting is sufficiently slow that we want to use a smaller size - static const size_t size = BASE_SIZE / 10; - const char* name() const { - return "BenchmarkSortIntDictionary"; + return "SortIntDictionary"; } void before_all(DBRef group) @@ -1285,11 +1282,11 @@ struct BenchmarkSortIntDictionary : Benchmark { Dictionary dict = obj.get_dictionary(m_col); Random r; - for (size_t i = 0; i < size; ++i) { + for (size_t i = 0; i < BASE_SIZE; ++i) { dict.insert(util::to_string(i), r.draw_int()); } tr.commit(); - m_indices.reserve(size); + m_indices.reserve(BASE_SIZE); } void after_all(DBRef db)