diff --git a/CHANGELOG.md b/CHANGELOG.md index 589e5ec9f2e..2893150115e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * Add support for building with Xcode 14 using the CMake project ([PR #5577](https://github.com/realm/realm-core/pull/5577)). * Expose MongoDB client interface in the C API. ([PR #5638](https://github.com/realm/realm-core/pull/5638)). * Add support in the C API for constructing a new `realm_app_t` object via `realm_app_create`. ([PR #5570](https://github.com/realm/realm-core/issues/5570)) +* Greatly improve the performance of sorting or distincting a Dictionary's keys or values. The most expensive operation is now performed O(log N) rather than O(N log N) times, and large Dictionaries can see upwards of 99% reduction in time to sort. ([PR #5166](https://github.com/realm/realm-core/pulls/5166)) ### Fixed * ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?) diff --git a/src/realm/dictionary.cpp b/src/realm/dictionary.cpp index 458f07d652e..7d04acf2e9c 100644 --- a/src/realm/dictionary.cpp +++ b/src/realm/dictionary.cpp @@ -451,27 +451,49 @@ void Dictionary::align_indices(std::vector& indices) const } } -void Dictionary::sort(std::vector& indices, bool ascending) const +namespace { +std::vector get_keys(const Dictionary& dict) +{ + std::vector values; + values.reserve(dict.size()); + for (auto it = dict.begin(), end = dict.end(); it != end; ++it) + values.push_back(it.key()); + return values; +} + +std::vector get_values(const Dictionary& dict) +{ + std::vector values; + values.reserve(dict.size()); + for (auto it = dict.begin(), end = dict.end(); it != end; ++it) + values.push_back(it.value()); + return values; +} + +void do_sort(std::vector& indices, bool ascending, const std::vector& values) { - align_indices(indices); auto b = indices.begin(); auto e = indices.end(); - std::sort(b, e, [this, ascending](size_t i1, size_t i2) { - auto v1 = get_any(i1); - auto v2 = get_any(i2); - return ascending ? v1 < v2 : v2 < v1; + std::sort(b, e, [ascending, &values](size_t i1, size_t i2) { + return ascending ? values[i1] < values[i2] : values[i2] < values[i1]; }); } +} // anonymous namespace -void Dictionary::distinct(std::vector& indices, util::Optional ascending) const +void Dictionary::sort(std::vector& indices, bool ascending) const { align_indices(indices); + do_sort(indices, ascending, get_values(*this)); +} - bool sort_ascending = ascending ? *ascending : true; - sort(indices, sort_ascending); +void Dictionary::distinct(std::vector& indices, util::Optional ascending) const +{ + align_indices(indices); + auto values = get_values(*this); + do_sort(indices, ascending.value_or(true), values); indices.erase(std::unique(indices.begin(), indices.end(), - [this](size_t i1, size_t i2) { - return get_any(i1) == get_any(i2); + [&values](size_t i1, size_t i2) { + return values[i1] == values[i2]; }), indices.end()); @@ -484,13 +506,7 @@ void Dictionary::distinct(std::vector& indices, util::Optional asc void Dictionary::sort_keys(std::vector& indices, bool ascending) const { align_indices(indices); - auto b = indices.begin(); - auto e = indices.end(); - std::sort(b, e, [this, ascending](size_t i1, size_t i2) { - auto k1 = get_key(i1); - auto k2 = get_key(i2); - return ascending ? k1 < k2 : k2 < k1; - }); + do_sort(indices, ascending, get_keys(*this)); } void Dictionary::distinct_keys(std::vector& indices, util::Optional) const @@ -1072,34 +1088,38 @@ Dictionary::Iterator::Iterator(const Dictionary* dict, size_t pos) { } -auto Dictionary::Iterator::operator*() const -> value_type +Mixed Dictionary::Iterator::key() const { - update(); - Mixed key; switch (m_key_type) { case type_String: { ArrayString keys(m_tree.get_alloc()); ref_type ref = to_ref(Array::get(m_leaf.get_mem().get_addr(), 1)); keys.init_from_ref(ref); - key = Mixed(keys.get(m_state.m_current_index)); - break; + return Mixed(keys.get(m_state.m_current_index)); } case type_Int: { ArrayInteger keys(m_tree.get_alloc()); ref_type ref = to_ref(Array::get(m_leaf.get_mem().get_addr(), 1)); keys.init_from_ref(ref); - key = Mixed(keys.get(m_state.m_current_index)); - break; + return Mixed(keys.get(m_state.m_current_index)); } default: throw std::runtime_error("Not implemented"); - break; } +} + +Mixed Dictionary::Iterator::value() const +{ ArrayMixed values(m_tree.get_alloc()); ref_type ref = to_ref(Array::get(m_leaf.get_mem().get_addr(), 2)); values.init_from_ref(ref); + return values.get(m_state.m_current_index); +} - return std::make_pair(key, values.get(m_state.m_current_index)); +auto Dictionary::Iterator::operator*() const -> value_type +{ + update(); + return std::make_pair(key(), value()); } diff --git a/src/realm/dictionary.hpp b/src/realm/dictionary.hpp index 7ea1f5762ce..e9127834f8a 100644 --- a/src/realm/dictionary.hpp +++ b/src/realm/dictionary.hpp @@ -221,6 +221,9 @@ class Dictionary::Iterator : public ClusterTree::Iterator { using ClusterTree::Iterator::get_position; + Mixed key() const; + Mixed value() const; + private: friend class Dictionary; diff --git a/test/benchmark-common-tasks/main.cpp b/test/benchmark-common-tasks/main.cpp index 1ce031bf9ef..5dcd02cd015 100644 --- a/test/benchmark-common-tasks/main.cpp +++ b/test/benchmark-common-tasks/main.cpp @@ -1224,7 +1224,7 @@ struct BenchmarkSortInt : BenchmarkWithInts { struct BenchmarkSortIntList : Benchmark { const char* name() const { - return "BenchmarkSortIntList"; + return "SortIntList"; } void before_all(DBRef group) @@ -1267,12 +1267,9 @@ struct BenchmarkSortIntList : Benchmark { }; struct BenchmarkSortIntDictionary : Benchmark { - // Dictionary sorting is sufficiently slow that we want to use a smaller size - static const size_t size = BASE_SIZE / 10; - const char* name() const { - return "BenchmarkSortIntDictionary"; + return "SortIntDictionary"; } void before_all(DBRef group) @@ -1285,11 +1282,11 @@ struct BenchmarkSortIntDictionary : Benchmark { Dictionary dict = obj.get_dictionary(m_col); Random r; - for (size_t i = 0; i < size; ++i) { + for (size_t i = 0; i < BASE_SIZE; ++i) { dict.insert(util::to_string(i), r.draw_int()); } tr.commit(); - m_indices.reserve(size); + m_indices.reserve(BASE_SIZE); } void after_all(DBRef db)