diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a28b2da927..f23d8da8489 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ * Expose `realm_object_get_parent` in the C API (PR [#5851](https://github.com/realm/realm-core/pull/5851)) * Expose `realm_list_find` in the C API (PR [#5848](https://github.com/realm/realm-core/pull/5848)) * Expose `Group::remove_table` in the C API (PR [#5860](https://github.com/realm/realm-core/pull/5860)) +* Cut the runtime of aggregate operations on large dictionaries in half ([PR #5864](https://github.com/realm/realm-core/pull/5864)). +* Improve performance of aggregate operations on collections of objects by 2x to 10x ([PR #5864](https://github.com/realm/realm-core/pull/5864)). ### Fixed * ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?) @@ -23,7 +25,7 @@ * Throw exception if `Realm::Convert` tries to convert to flexible sync. ([#5798](https://github.com/realm/realm-core/issues/5798), since v11.16.0). ### Breaking changes -* None. +* The typed aggregation functions (e.g. `minimum_int`) on `Table`, `TableView`, and `Query` have been removed and replaced with simpler untyped versions which return `Mixed`. This does not effect SDKs which only used them via the Object Store types. ### Compatibility * Fileformat: Generates files with format v22. Reads and automatically upgrade from fileformat v5. diff --git a/src/realm/CMakeLists.txt b/src/realm/CMakeLists.txt index 6f211fb0a51..d1b40710fe9 100644 --- a/src/realm/CMakeLists.txt +++ b/src/realm/CMakeLists.txt @@ -265,6 +265,39 @@ set(REALM_INSTALL_HEADERS metrics/transaction_info.hpp ) # REALM_INSTALL_HEADERS +set(REALM_NOINST_HEADERS + link_translator.hpp + object_converter.hpp + query_conditions_tpl.hpp + util/cf_str.hpp + util/circular_buffer.hpp + util/cli_args.hpp + util/copy_dir_recursive.hpp + util/demangle.hpp + util/duplicating_logger.hpp + util/enum.hpp + util/ez_websocket.hpp + util/file_is_regular.hpp + util/from_chars.hpp + util/get_file_size.hpp + util/json_parser.hpp + util/load_file.hpp + util/network_ssl.hpp + util/parent_dir.hpp + util/platform_info.hpp + util/quote.hpp + util/random.hpp + util/resource_limits.hpp + util/scratch_allocator.hpp + util/signal_blocker.hpp + util/substitute.hpp + util/thread_exec_guard.hpp + util/time.hpp + util/timestamp_formatter.hpp + util/timestamp_logger.hpp + util/value_reset_guard.hpp +) # REALM_NOINST_HEADERS + list(APPEND REALM_SOURCES metrics/metrics.cpp metrics/metric_timer.cpp @@ -279,6 +312,7 @@ add_library(Storage STATIC ${REALM_SOURCES} ${UTIL_SOURCES} ${REALM_INSTALL_HEADERS} + ${REALM_NOINST_HEADERS} ${REALM_OBJECT_FILES} $ ) diff --git a/src/realm/dictionary.cpp b/src/realm/dictionary.cpp index df4876f2445..bdcebacc9ff 100644 --- a/src/realm/dictionary.cpp +++ b/src/realm/dictionary.cpp @@ -393,28 +393,67 @@ size_t Dictionary::find_any_key(Mixed key) const noexcept return m_clusters->get_ndx_with_key(get_internal_obj_key(key), key); } +namespace { +bool can_minmax(DataType type) +{ + switch (type) { + case type_Int: + case type_Float: + case type_Double: + case type_Decimal: + case type_Mixed: + case type_Timestamp: + return true; + default: + return false; + } +} +bool can_sum(DataType type) +{ + switch (type) { + case type_Int: + case type_Float: + case type_Double: + case type_Decimal: + case type_Mixed: + return true; + default: + return false; + } +} +} // anonymous namespace + util::Optional Dictionary::min(size_t* return_ndx) const { + if (!can_minmax(get_value_data_type())) { + return std::nullopt; + } if (update()) { return m_clusters->min(return_ndx); } if (return_ndx) - *return_ndx = realm::npos; + *return_ndx = realm::not_found; return Mixed{}; } util::Optional Dictionary::max(size_t* return_ndx) const { + if (!can_minmax(get_value_data_type())) { + return std::nullopt; + } if (update()) { return m_clusters->max(return_ndx); } if (return_ndx) - *return_ndx = realm::npos; + *return_ndx = realm::not_found; return Mixed{}; } util::Optional Dictionary::sum(size_t* return_cnt) const { + if (!can_sum(get_value_data_type())) { + return std::nullopt; + } if (update()) { return m_clusters->sum(return_cnt, get_value_data_type()); } @@ -425,6 +464,9 @@ util::Optional Dictionary::sum(size_t* return_cnt) const util::Optional Dictionary::avg(size_t* return_cnt) const { + if (!can_sum(get_value_data_type())) { + return std::nullopt; + } if (update()) { return m_clusters->avg(return_cnt, get_value_data_type()); } @@ -1124,14 +1166,6 @@ ObjKey DictionaryLinkValues::get_key(size_t ndx) const return {}; } -// In contrast to a link list and a link set, a dictionary can contain null links. -// This is because the corresponding key may contain useful information by itself. -bool DictionaryLinkValues::is_obj_valid(size_t ndx) const noexcept -{ - Mixed val = m_source.get_any(ndx); - return val.is_type(type_TypedLink); -} - Obj DictionaryLinkValues::get_object(size_t row_ndx) const { Mixed val = m_source.get_any(row_ndx); diff --git a/src/realm/dictionary.hpp b/src/realm/dictionary.hpp index fc96e1a118a..34d374815a9 100644 --- a/src/realm/dictionary.hpp +++ b/src/realm/dictionary.hpp @@ -239,7 +239,6 @@ class DictionaryLinkValues final : public ObjCollectionBase { // Overrides of ObjList: ObjKey get_key(size_t ndx) const final; - bool is_obj_valid(size_t ndx) const noexcept final; Obj get_object(size_t row_ndx) const final; // Overrides of CollectionBase, these simply forward to the underlying dictionary. diff --git a/src/realm/list.hpp b/src/realm/list.hpp index 70e4583587f..a5489fef6db 100644 --- a/src/realm/list.hpp +++ b/src/realm/list.hpp @@ -414,11 +414,6 @@ class LnkLst final : public ObjCollectionBase { void swap(size_t ndx1, size_t ndx2) final; // Overriding members of ObjList: - bool is_obj_valid(size_t) const noexcept final - { - // A link list cannot contain null values - return true; - } Obj get_object(size_t ndx) const final { ObjKey key = this->get(ndx); diff --git a/src/realm/mixed.cpp b/src/realm/mixed.cpp index 8703cfd5030..ad6a5737fc2 100644 --- a/src/realm/mixed.cpp +++ b/src/realm/mixed.cpp @@ -222,7 +222,7 @@ bool Mixed::accumulate_numeric_to(Decimal128& destination) const return did_accumulate; } -int Mixed::compare(const Mixed& b) const +int Mixed::compare(const Mixed& b) const noexcept { // Observe! Changing this function breaks the file format for Set diff --git a/src/realm/mixed.hpp b/src/realm/mixed.hpp index c1f5c8a0ccf..0349f68ff45 100644 --- a/src/realm/mixed.hpp +++ b/src/realm/mixed.hpp @@ -209,32 +209,32 @@ class Mixed { bool is_unresolved_link() const; bool is_same_type(const Mixed& b) const; // Will use utf8_compare for strings - int compare(const Mixed& b) const; + int compare(const Mixed& b) const noexcept; // Will compare strings as arrays of signed chars int compare_signed(const Mixed& b) const; - bool operator==(const Mixed& other) const + friend bool operator==(const Mixed& a, const Mixed& b) noexcept { - return compare(other) == 0; + return a.compare(b) == 0; } - bool operator!=(const Mixed& other) const + friend bool operator!=(const Mixed& a, const Mixed& b) noexcept { - return compare(other) != 0; + return a.compare(b) != 0; } - bool operator<(const Mixed& other) const + friend bool operator<(const Mixed& a, const Mixed& b) noexcept { - return compare(other) < 0; + return a.compare(b) < 0; } - bool operator>(const Mixed& other) const + friend bool operator>(const Mixed& a, const Mixed& b) noexcept { - return compare(other) > 0; + return a.compare(b) > 0; } - bool operator<=(const Mixed& other) const + friend bool operator<=(const Mixed& a, const Mixed& b) noexcept { - return compare(other) <= 0; + return a.compare(b) <= 0; } - bool operator>=(const Mixed& other) const + friend bool operator>=(const Mixed& a, const Mixed& b) noexcept { - return compare(other) >= 0; + return a.compare(b) >= 0; } Mixed operator+(const Mixed&) const; diff --git a/src/realm/obj.hpp b/src/realm/obj.hpp index b36359ac942..ad1b0c9be83 100644 --- a/src/realm/obj.hpp +++ b/src/realm/obj.hpp @@ -94,7 +94,7 @@ enum class UpdateStatus { // 'Object' would have been a better name, but it clashes with a class in ObjectStore class Obj { public: - Obj() + constexpr Obj() : m_table(nullptr) , m_row_ndx(size_t(-1)) , m_storage_version(-1) diff --git a/src/realm/obj_list.hpp b/src/realm/obj_list.hpp index 35f46434f64..6ea00cabb53 100644 --- a/src/realm/obj_list.hpp +++ b/src/realm/obj_list.hpp @@ -34,7 +34,6 @@ class ObjList { virtual size_t size() const = 0; virtual TableRef get_target_table() const = 0; virtual ObjKey get_key(size_t ndx) const = 0; - virtual bool is_obj_valid(size_t ndx) const noexcept = 0; virtual Obj get_object(size_t row_ndx) const = 0; virtual void sync_if_needed() const = 0; virtual void get_dependencies(TableVersions&) const = 0; @@ -64,18 +63,13 @@ class ObjList { { return get_object(ndx); } - virtual Obj try_get_object(size_t row_ndx) const noexcept - { - REALM_ASSERT(row_ndx < size()); - return is_obj_valid(row_ndx) ? get_object(row_ndx) : Obj(); - } template void for_each(F func) const { auto sz = size(); for (size_t i = 0; i < sz; i++) { - auto o = try_get_object(i); + auto o = get_object(i); if (o && func(o) == IteratorControl::Stop) return; } @@ -86,7 +80,7 @@ class ObjList { { auto sz = size(); for (size_t i = 0; i < sz; i++) { - auto o = try_get_object(i); + auto o = get_object(i); if (o) { T v = o.get(column_key); if (v == value) diff --git a/src/realm/object-store/collection.cpp b/src/realm/object-store/collection.cpp index 09d0ac951fe..fd9dd2a8bc9 100644 --- a/src/realm/object-store/collection.cpp +++ b/src/realm/object-store/collection.cpp @@ -198,6 +198,25 @@ Results Collection::snapshot() const return as_results().snapshot(); } +std::optional Collection::max(ColKey col) const +{ + return as_results().max(col); +} + +util::Optional Collection::min(ColKey col) const +{ + return as_results().min(col); +} + +Mixed Collection::sum(ColKey col) const +{ + return *as_results().sum(col); +} + +util::Optional Collection::average(ColKey col) const +{ + return as_results().average(col); +} NotificationToken Collection::add_notification_callback(CollectionChangeCallback callback, KeyPathArray key_path_array) & diff --git a/src/realm/object-store/collection.hpp b/src/realm/object-store/collection.hpp index 7bb43240d42..fe970f00e39 100644 --- a/src/realm/object-store/collection.hpp +++ b/src/realm/object-store/collection.hpp @@ -101,6 +101,15 @@ class Collection { Results sort(SortDescriptor order) const; Results sort(std::vector> const& keypaths) const; + // Get the min/max/average/sum of the given column + // All but sum() returns none when collection is empty, and sum() returns 0 + // Throws UnsupportedColumnTypeException for sum/average on timestamp or non-numeric column + // Throws OutOfBoundsIndexException for an out-of-bounds column + util::Optional max(ColKey column = {}) const; + util::Optional min(ColKey column = {}) const; + util::Optional average(ColKey column = {}) const; + Mixed sum(ColKey column = {}) const; + /** * Adds a `CollectionChangeCallback` to this `Collection`. The `CollectionChangeCallback` is exectuted when * insertions, modifications or deletions happen on this `Collection`. diff --git a/src/realm/object-store/list.cpp b/src/realm/object-store/list.cpp index 36076272072..7fbc5f3d97f 100644 --- a/src/realm/object-store/list.cpp +++ b/src/realm/object-store/list.cpp @@ -295,89 +295,6 @@ Results List::filter(Query q) const return Results(m_realm, std::dynamic_pointer_cast(m_coll_base), get_query().and_query(std::move(q))); } -template -struct HasMinmaxType : std::false_type { -}; -template -struct HasMinmaxType::minmax_type>> : std::true_type { -}; - -template -struct HasSumType : std::false_type { -}; -template -struct HasSumType::sum_type>> : std::true_type { -}; - -template -struct If; - -template <> -struct If { - template - static auto call(T self, Then&& fn, Else&&) - { - return fn(self); - } -}; -template <> -struct If { - template - static auto call(T, Then&&, Else&& fn) - { - return fn(); - } -}; - -util::Optional List::max(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return as_results().max(col); - size_t out_ndx = not_found; - auto result = list_base().max(&out_ndx); - if (!result) { - throw Results::UnsupportedColumnTypeException(list_base().get_col_key(), list_base().get_table(), "max"); - } - return out_ndx == not_found ? none : result; -} - -util::Optional List::min(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return as_results().min(col); - - size_t out_ndx = not_found; - auto result = list_base().min(&out_ndx); - if (!result) { - throw Results::UnsupportedColumnTypeException(list_base().get_col_key(), list_base().get_table(), "min"); - } - return out_ndx == not_found ? none : result; -} - -Mixed List::sum(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return *as_results().sum(col); - - auto result = list_base().sum(); - if (!result) { - throw Results::UnsupportedColumnTypeException(list_base().get_col_key(), list_base().get_table(), "sum"); - } - return *result; -} - -util::Optional List::average(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return as_results().average(col); - size_t count = 0; - auto result = list_base().avg(&count); - if (!result) { - throw Results::UnsupportedColumnTypeException(list_base().get_col_key(), list_base().get_table(), "average"); - } - return count == 0 ? none : result; -} - bool List::operator==(List const& rgt) const noexcept { return list_base().get_table() == rgt.list_base().get_table() && diff --git a/src/realm/object-store/list.hpp b/src/realm/object-store/list.hpp index fb2b6b25688..1f12b2c43a4 100644 --- a/src/realm/object-store/list.hpp +++ b/src/realm/object-store/list.hpp @@ -86,16 +86,6 @@ class List : public object_store::Collection { // Equivalent to producing a thread-safe reference and resolving it in the frozen realm. List freeze(std::shared_ptr const& frozen_realm) const; - // Get the min/max/average/sum of the given column - // All but sum() returns none when there are zero matching rows - // sum() returns 0, - // Throws UnsupportedColumnTypeException for sum/average on timestamp or non-numeric column - // Throws OutOfBoundsIndexException for an out-of-bounds column - util::Optional max(ColKey column = {}) const; - util::Optional min(ColKey column = {}) const; - util::Optional average(ColKey column = {}) const; - Mixed sum(ColKey column = {}) const; - bool operator==(List const& rgt) const noexcept; template diff --git a/src/realm/object-store/results.cpp b/src/realm/object-store/results.cpp index cfd1eb35bd8..1f64c2274e5 100644 --- a/src/realm/object-store/results.cpp +++ b/src/realm/object-store/results.cpp @@ -403,7 +403,7 @@ util::Optional Results::try_get(size_t row_ndx) case Mode::TableView: if (row_ndx >= m_table_view.size()) break; - return m_table_view.try_get_object(row_ndx); + return m_table_view.get_object(row_ndx); } return util::none; } @@ -547,286 +547,24 @@ size_t Results::index_of(Query&& q) return row ? index_of(const_cast(*m_table).get_object(row)) : not_found; } -DataType Results::prepare_for_aggregate(ColKey column, const char* name) -{ - DataType type; - switch (m_mode) { - case Mode::Table: - type = m_table->get_column_type(column); - break; - case Mode::Collection: - type = m_collection->get_table()->get_column_type(m_collection->get_col_key()); - if (type != type_LinkList && type != type_Link) - break; - m_query = do_get_query(); - m_mode = Mode::Query; - REALM_FALLTHROUGH; - case Mode::Query: - case Mode::TableView: - ensure_up_to_date(); - type = m_table->get_column_type(column); - break; - default: - REALM_COMPILER_HINT_UNREACHABLE(); - } - switch (type) { - case type_Timestamp: - case type_Double: - case type_Float: - case type_Int: - case type_Decimal: - case type_Mixed: - break; - default: - if (m_mode == Mode::Collection) { - throw UnsupportedColumnTypeException(m_collection->get_col_key(), m_collection->get_table(), name); - } - else { - throw UnsupportedColumnTypeException{column, *m_table, name}; - } - } - return type; -} - namespace { -template -struct AggregateHelper; - -template -struct AggregateHelper { - Table& table; - Mixed min(ColKey col, ObjKey* obj) - { - return table.minimum_int(col, obj); - } - Mixed max(ColKey col, ObjKey* obj) - { - return table.maximum_int(col, obj); - } - Mixed sum(ColKey col) - { - return table.sum_int(col); - } - Mixed avg(ColKey col, size_t* count) - { - return table.average_int(col, count); - } -}; - -template -struct AggregateHelper { - Table& table; - Mixed min(ColKey col, ObjKey* obj) - { - return table.minimum_double(col, obj); - } - Mixed max(ColKey col, ObjKey* obj) - { - return table.maximum_double(col, obj); - } - Mixed sum(ColKey col) - { - return table.sum_double(col); - } - Mixed avg(ColKey col, size_t* count) - { - return table.average_double(col, count); - } -}; - -template -struct AggregateHelper { - Table& table; - Mixed min(ColKey col, ObjKey* obj) - { - return table.minimum_float(col, obj); - } - Mixed max(ColKey col, ObjKey* obj) +struct CollectionAggregateAdaptor { + const CollectionBase& list; + util::Optional min(ColKey) { - return table.maximum_float(col, obj); + return list.min(); } - Mixed sum(ColKey col) - { - return table.sum_float(col); - } - Mixed avg(ColKey col, size_t* count) - { - return table.average_float(col, count); - } -}; - -template -struct AggregateHelper { - Table& table; - Mixed min(ColKey col, ObjKey* obj) - { - return table.minimum_timestamp(col, obj); - } - Mixed max(ColKey col, ObjKey* obj) - { - return table.maximum_timestamp(col, obj); - } - Mixed sum(ColKey col) - { - throw Results::UnsupportedColumnTypeException{col, table, "sum"}; - } - Mixed avg(ColKey col, size_t*) - { - throw Results::UnsupportedColumnTypeException{col, table, "avg"}; - } -}; - -template -struct AggregateHelper { - Table& table; - Mixed min(ColKey col, ObjKey* obj) - { - return table.minimum_decimal(col, obj); - } - Mixed max(ColKey col, ObjKey* obj) - { - return table.maximum_decimal(col, obj); - } - Mixed sum(ColKey col) - { - return table.sum_decimal(col); - } - Mixed avg(ColKey col, size_t* count) - { - return table.average_decimal(col, count); - } -}; - -template -struct AggregateHelper { - Table& table; - Mixed min(ColKey col, ObjKey* obj) + util::Optional max(ColKey) { - return table.minimum_mixed(col, obj); - } - Mixed max(ColKey col, ObjKey* obj) - { - return table.maximum_mixed(col, obj); - } - Mixed sum(ColKey col) - { - return table.sum_mixed(col); - } - Mixed avg(ColKey col, size_t* count) - { - return table.average_mixed(col, count); - } -}; - - -struct ListAggregateHelper { - CollectionBase& list; - util::Optional min(ColKey, size_t* ndx) - { - return list.min(ndx); - } - util::Optional max(ColKey, size_t* ndx) - { - return list.max(ndx); + return list.max(); } util::Optional sum(ColKey) { return list.sum(); } - util::Optional avg(ColKey, size_t* count) - { - return list.avg(count); - } -}; - -template <> -struct AggregateHelper : ListAggregateHelper { - AggregateHelper(CollectionBase& l) - : ListAggregateHelper{l} + util::Optional avg(ColKey) { - } -}; -template <> -struct AggregateHelper : ListAggregateHelper { - AggregateHelper(CollectionBase& l) - : ListAggregateHelper{l} - { - } -}; -template <> -struct AggregateHelper : ListAggregateHelper { - AggregateHelper(CollectionBase& l) - : ListAggregateHelper{l} - { - } -}; -template <> -struct AggregateHelper : ListAggregateHelper { - AggregateHelper(CollectionBase& l) - : ListAggregateHelper{l} - { - } -}; - -template <> -struct AggregateHelper : ListAggregateHelper { - AggregateHelper(CollectionBase& l) - : ListAggregateHelper{l} - { - } - Mixed sum(ColKey) - { - throw Results::UnsupportedColumnTypeException{list.get_col_key(), *list.get_table(), "sum"}; - } - Mixed avg(ColKey, size_t*) - { - throw Results::UnsupportedColumnTypeException{list.get_col_key(), *list.get_table(), "avg"}; - } -}; - -template <> -struct AggregateHelper : ListAggregateHelper { - AggregateHelper(CollectionBase& l) - : ListAggregateHelper{l} - { - } -}; - -template -util::Optional call_with_helper(Func&& func, Table&& table, DataType type) -{ - switch (type) { - case type_Timestamp: - return func(AggregateHelper{table}); - case type_Double: - return func(AggregateHelper{table}); - case type_Float: - return func(AggregateHelper{table}); - case type_Int: - return func(AggregateHelper{table}); - case type_Decimal: - return func(AggregateHelper{table}); - case type_Mixed: - return func(AggregateHelper{table}); - default: - REALM_COMPILER_HINT_UNREACHABLE(); - } -} - -struct ReturnIndexHelper { - ObjKey key; - size_t index = npos; - operator ObjKey*() - { - return &key; - } - operator size_t*() - { - return &index; - } - operator bool() - { - return key || index != npos; + return list.avg(); } }; } // anonymous namespace @@ -839,49 +577,70 @@ util::Optional Results::aggregate(ColKey column, const char* name, Aggreg if (!m_table && !m_collection) return none; - auto type = prepare_for_aggregate(column, name); + ensure_up_to_date(); + std::optional ret; switch (m_mode) { case Mode::Table: - return call_with_helper(func, *m_table, type); + ret = func(*m_table); + break; + case Mode::Query: + ret = func(m_query); + break; case Mode::Collection: - return call_with_helper(func, *m_collection, type); + if (do_get_type() != PropertyType::Object) + ret = func(CollectionAggregateAdaptor{*m_collection}); + else + ret = func(do_get_query()); + break; default: - return call_with_helper(func, m_table_view, type); + ret = func(m_table_view); + break; + } + + // `none` indicates that it's an unsupported operation for the column type. + // `some(null)` indicates that there's no rows in the thing being aggregated + // Any other value is just the result to return + if (ret) { + return ret->is_null() ? std::nullopt : std::optional(*ret); + } + + // We need to report the column and table actually being aggregated on, + // which is the collection if it's not a link collection and the target + // of the links otherwise + if (m_mode == Mode::Collection && do_get_type() != PropertyType::Object) { + throw UnsupportedColumnTypeException(m_collection->get_col_key(), m_collection->get_table(), name); + } + else { + throw UnsupportedColumnTypeException{column, *m_table, name}; } } util::Optional Results::max(ColKey column) { - ReturnIndexHelper return_ndx; - auto results = aggregate(column, "max", [&](auto&& helper) { - return helper.max(column, return_ndx); + return aggregate(column, "max", [column](auto&& helper) { + return helper.max(column); }); - return return_ndx ? results : none; } util::Optional Results::min(ColKey column) { - ReturnIndexHelper return_ndx; - auto results = aggregate(column, "min", [&](auto&& helper) { - return helper.min(column, return_ndx); + return aggregate(column, "min", [column](auto&& helper) { + return helper.min(column); }); - return return_ndx ? results : none; } util::Optional Results::sum(ColKey column) { - return aggregate(column, "sum", [&](auto&& helper) { + return aggregate(column, "sum", [column](auto&& helper) { return helper.sum(column); }); } util::Optional Results::average(ColKey column) { - size_t value_count = 0; - auto results = aggregate(column, "avg", [&](auto&& helper) { - return helper.avg(column, &value_count); + return aggregate(column, "avg", [column](auto&& helper) { + return helper.avg(column); }); - return value_count == 0 ? none : results; } void Results::clear() @@ -983,11 +742,9 @@ Query Results::do_get_query() const if (const_cast(m_query).get_table()) return m_query; - // A TableView has an associated Query if it was produced by Query::find_all. This is indicated - // by TableView::get_query returning a Query with a non-null table. - Query query = m_table_view.get_query(); - if (query.get_table()) { - return query; + // A TableView has an associated Query if it was produced by Query::find_all + if (auto& query = m_table_view.get_query()) { + return *query; } // The TableView has no associated query so create one with no conditions that is restricted @@ -995,7 +752,7 @@ Query Results::do_get_query() const if (m_update_policy == UpdatePolicy::Auto) { m_table_view.sync_if_needed(); } - return Query(m_table, std::unique_ptr(new TableView(m_table_view))); + return Query(m_table, std::make_unique(m_table_view)); } case Mode::Collection: if (auto list = dynamic_cast(m_collection.get())) { diff --git a/src/realm/object-store/results.hpp b/src/realm/object-store/results.hpp index 5d261b4d455..f8d1cd979ef 100644 --- a/src/realm/object-store/results.hpp +++ b/src/realm/object-store/results.hpp @@ -399,7 +399,6 @@ class Results { template util::Optional aggregate(ColKey column, const char* name, AggregateFunction&& func) REQUIRES(!m_mutex); - DataType prepare_for_aggregate(ColKey column, const char* name) REQUIRES(m_mutex); template auto dispatch(Fn&&) const REQUIRES(!m_mutex); diff --git a/src/realm/object-store/set.cpp b/src/realm/object-store/set.cpp index 4a26d86c3ac..92c814d44fe 100644 --- a/src/realm/object-store/set.cpp +++ b/src/realm/object-store/set.cpp @@ -84,55 +84,6 @@ std::pair Set::remove(const T& value) return as().erase(value); } -util::Optional Set::max(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return as_results().max(col); - size_t out_ndx = not_found; - auto result = set_base().max(&out_ndx); - if (!result) { - throw Results::UnsupportedColumnTypeException(set_base().get_col_key(), set_base().get_table(), "max"); - } - return out_ndx == not_found ? none : result; -} - -util::Optional Set::min(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return as_results().min(col); - - size_t out_ndx = not_found; - auto result = set_base().min(&out_ndx); - if (!result) { - throw Results::UnsupportedColumnTypeException(set_base().get_col_key(), set_base().get_table(), "min"); - } - return out_ndx == not_found ? none : result; -} - -Mixed Set::sum(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return *as_results().sum(col); - - auto result = set_base().sum(); - if (!result) { - throw Results::UnsupportedColumnTypeException(set_base().get_col_key(), set_base().get_table(), "sum"); - } - return *result; -} - -util::Optional Set::average(ColKey col) const -{ - if (get_type() == PropertyType::Object) - return as_results().average(col); - size_t count = 0; - auto result = set_base().avg(&count); - if (!result) { - throw Results::UnsupportedColumnTypeException(set_base().get_col_key(), set_base().get_table(), "average"); - } - return count == 0 ? none : result; -} - bool Set::operator==(const Set& rgt) const noexcept { return set_base().get_table() == rgt.set_base().get_table() && diff --git a/src/realm/object-store/set.hpp b/src/realm/object-store/set.hpp index 8663a894ca5..edcb59faa0e 100644 --- a/src/realm/object-store/set.hpp +++ b/src/realm/object-store/set.hpp @@ -91,16 +91,6 @@ class Set : public Collection { Set freeze(const std::shared_ptr& realm) const; - // Get the min/max/average/sum of the given column - // All but sum() returns none when there are zero matching rows - // sum() returns 0, - // Throws UnsupportedColumnTypeException for sum/average on timestamp or non-numeric column - // Throws OutOfBoundsIndexException for an out-of-bounds column - util::Optional max(ColKey column = {}) const; - util::Optional min(ColKey column = {}) const; - util::Optional average(ColKey column = {}) const; - Mixed sum(ColKey column = {}) const; - bool is_subset_of(const Collection& rhs) const; bool is_strict_subset_of(const Collection& rhs) const; bool is_superset_of(const Collection& rhs) const; diff --git a/src/realm/query.cpp b/src/realm/query.cpp index a92dded20e5..af939ecd055 100644 --- a/src/realm/query.cpp +++ b/src/realm/query.cpp @@ -22,16 +22,15 @@ #include #include #include +#include #include #include #include -#include #include #include #include - using namespace realm; using namespace realm::metrics; @@ -949,7 +948,7 @@ bool Query::eval_object(const Obj& obj) const template -void Query::aggregate(QueryStateBase& st, ColKey column_key, size_t* resultcount, ObjKey* return_ndx) const +void Query::aggregate(QueryStateBase& st, ColKey column_key) const { using LeafType = typename ColumnTypeTraits::cluster_leaf_type; @@ -1008,14 +1007,6 @@ void Query::aggregate(QueryStateBase& st, ColKey column_key, size_t* resultcount }); } } - - if (resultcount) { - *resultcount = st.match_count(); - } - - if (return_ndx) { - *return_ndx = ObjKey(st.m_minmax_key); - } } size_t Query::find_best_node(ParentNode* pn) const @@ -1064,260 +1055,40 @@ void Query::aggregate_internal(ParentNode* pn, QueryStateBase* st, size_t start, } } +// Aggregates -// Sum - -int64_t Query::sum_int(ColKey column_key) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Sum); -#endif - - QueryStateSum st; - if (m_table->is_nullable(column_key)) { - aggregate>(st, column_key); - } - else { - aggregate(st, column_key); - } - return st.result_sum(); -} -double Query::sum_float(ColKey column_key) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Sum); -#endif - - QueryStateSum st; - aggregate(st, column_key); - return st.result_sum(); -} -double Query::sum_double(ColKey column_key) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Sum); -#endif - QueryStateSum st; - aggregate(st, column_key); - return st.result_sum(); -} - -Decimal128 Query::sum_decimal128(ColKey column_key) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Sum); -#endif - - QueryStateSum st; - aggregate(st, column_key); - return st.result_sum(); -} - -Decimal128 Query::sum_mixed(ColKey column_key) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Sum); -#endif - - QueryStateSum st; - aggregate(st, column_key); - return st.result_sum(); -} - -// Maximum - -int64_t Query::maximum_int(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Maximum); -#endif - - QueryStateMax st; - if (m_table->is_nullable(column_key)) { - aggregate>(st, column_key, nullptr, return_ndx); - } - else { - aggregate(st, column_key, nullptr, return_ndx); - } - return st.get_max(); -} - -float Query::maximum_float(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Maximum); -#endif - - QueryStateMax st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_max(); -} -double Query::maximum_double(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Maximum); -#endif - - QueryStateMax st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_max(); -} - -Decimal128 Query::maximum_decimal128(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Maximum); -#endif - - QueryStateMax st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_max(); -} - -Mixed Query::maximum_mixed(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Maximum); -#endif - - QueryStateMax st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_max(); -} - -Timestamp Query::maximum_timestamp(ColKey column_key, ObjKey* return_ndx) -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Maximum); -#endif - - QueryStateMax st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_max(); -} - -// Minimum - -int64_t Query::minimum_int(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Minimum); -#endif - - QueryStateMin st; - if (m_table->is_nullable(column_key)) { - aggregate>(st, column_key, nullptr, return_ndx); - } - else { - aggregate(st, column_key, nullptr, return_ndx); - } - return st.get_min(); -} -float Query::minimum_float(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Minimum); -#endif - - QueryStateMin st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_min(); -} -double Query::minimum_double(ColKey column_key, ObjKey* return_ndx) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Minimum); -#endif - - QueryStateMin st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_min(); -} - -Timestamp Query::minimum_timestamp(ColKey column_key, ObjKey* return_ndx) +std::optional Query::sum(ColKey col_key) const { #if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Minimum); + auto metric_timer = QueryInfo::track(this, QueryInfo::type_Sum); #endif - - QueryStateMin st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_min(); + return AggregateHelper::sum(*m_table, *this, col_key); } -Decimal128 Query::minimum_decimal128(ColKey column_key, ObjKey* return_ndx) const +std::optional Query::avg(ColKey col_key, size_t* value_count) const { #if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Minimum); + auto metric_timer = QueryInfo::track(this, QueryInfo::type_Average); #endif - - QueryStateMin st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_min(); + return AggregateHelper::avg(*m_table, *this, col_key, value_count); } -Mixed Query::minimum_mixed(ColKey column_key, ObjKey* return_ndx) const +std::optional Query::min(ColKey col_key, ObjKey* return_ndx) const { #if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Minimum); + auto metric_timer = QueryInfo::track(this, QueryInfo::type_Minimum); #endif - - QueryStateMin st; - aggregate(st, column_key, nullptr, return_ndx); - return st.get_min(); + return AggregateHelper::min(*m_table, *this, col_key, return_ndx); } -// Average - -template -R Query::average(ColKey column_key, size_t* resultcount) const +std::optional Query::max(ColKey col_key, ObjKey* return_ndx) const { #if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Average); + auto metric_timer = QueryInfo::track(this, QueryInfo::type_Maximum); #endif - size_t resultcount2 = 0; - QueryStateSum::type> st; - aggregate(st, column_key, &resultcount2); - R sum1 = R(st.result_sum()); - R avg1{}; - if (resultcount2 != 0) - avg1 = sum1 / resultcount2; - if (resultcount) - *resultcount = resultcount2; - return avg1; + return AggregateHelper::max(*m_table, *this, col_key, return_ndx); } -double Query::average_int(ColKey column_key, size_t* resultcount) const -{ - if (m_table->is_nullable(column_key)) { - return average>(column_key, resultcount); - } - return average(column_key, resultcount); -} -double Query::average_float(ColKey column_key, size_t* resultcount) const -{ - return average(column_key, resultcount); -} -double Query::average_double(ColKey column_key, size_t* resultcount) const -{ - return average(column_key, resultcount); -} -Decimal128 Query::average_decimal128(ColKey column_key, size_t* resultcount) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Average); -#endif - return average(column_key, resultcount); -} -Decimal128 Query::average_mixed(ColKey column_key, size_t* resultcount) const -{ -#if REALM_METRICS - std::unique_ptr metric_timer = QueryInfo::track(this, QueryInfo::type_Average); -#endif - return average(column_key, resultcount); -} - - // Grouping Query& Query::group() { @@ -1418,7 +1189,7 @@ ObjKey Query::find() const if (m_view) { size_t sz = m_view->size(); for (size_t i = 0; i < sz; i++) { - const Obj obj = m_view->try_get_object(i); + const Obj obj = m_view->get_object(i); if (eval_object(obj)) { return obj.get_key(); } @@ -1457,7 +1228,7 @@ void Query::do_find_all(TableView& ret, size_t limit) const if (m_view) { size_t sz = m_view->size(); for (size_t t = 0; t < sz && ret.size() < limit; t++) { - const Obj obj = m_view->try_get_object(t); + const Obj obj = m_view->get_object(t); if (eval_object(obj)) { ret.m_key_values.add(obj.get_key()); } diff --git a/src/realm/query.hpp b/src/realm/query.hpp index fc45606f8ce..2a512aa9cad 100644 --- a/src/realm/query.hpp +++ b/src/realm/query.hpp @@ -257,28 +257,14 @@ class Query final { size_t count() const; TableView find_all(const DescriptorOrdering& descriptor) const; size_t count(const DescriptorOrdering& descriptor) const; - int64_t sum_int(ColKey column_key) const; - double average_int(ColKey column_key, size_t* resultcount = nullptr) const; - int64_t maximum_int(ColKey column_key, ObjKey* return_ndx = nullptr) const; - int64_t minimum_int(ColKey column_key, ObjKey* return_ndx = nullptr) const; - double sum_float(ColKey column_key) const; - double average_float(ColKey column_key, size_t* resultcount = nullptr) const; - float maximum_float(ColKey column_key, ObjKey* return_ndx = nullptr) const; - float minimum_float(ColKey column_key, ObjKey* return_ndx = nullptr) const; - double sum_double(ColKey column_key) const; - double average_double(ColKey column_key, size_t* resultcount = nullptr) const; - double maximum_double(ColKey column_key, ObjKey* return_ndx = nullptr) const; - double minimum_double(ColKey column_key, ObjKey* return_ndx = nullptr) const; - Timestamp maximum_timestamp(ColKey column_key, ObjKey* return_ndx = nullptr); - Timestamp minimum_timestamp(ColKey column_key, ObjKey* return_ndx = nullptr); - Decimal128 sum_decimal128(ColKey column_key) const; - Decimal128 maximum_decimal128(ColKey column_key, ObjKey* return_ndx = nullptr) const; - Decimal128 minimum_decimal128(ColKey column_key, ObjKey* return_ndx = nullptr) const; - Decimal128 average_decimal128(ColKey column_key, size_t* resultcount = nullptr) const; - Decimal128 sum_mixed(ColKey column_key) const; - Mixed maximum_mixed(ColKey column_key, ObjKey* return_ndx = nullptr) const; - Mixed minimum_mixed(ColKey column_key, ObjKey* return_ndx = nullptr) const; - Decimal128 average_mixed(ColKey column_key, size_t* resultcount = nullptr) const; + + // Aggregates return nullopt if the operation is not supported on the given column + // Everything but `sum` returns `some(null)` if there are no non-null values + // Sum returns `some(0)` if there are no non-null values. + std::optional sum(ColKey col_key) const; + std::optional min(ColKey col_key, ObjKey* = nullptr) const; + std::optional max(ColKey col_key, ObjKey* = nullptr) const; + std::optional avg(ColKey col_key, size_t* value_count = nullptr) const; // Deletion size_t remove() const; @@ -358,13 +344,8 @@ class Query final { template Query& add_size_condition(ColKey column_key, int64_t value); - template ::type>::ResultType> - R average(ColKey column_key, size_t* resultcount = nullptr) const; - template - void aggregate(QueryStateBase& st, ColKey column_key, size_t* resultcount = nullptr, - ObjKey* return_ndx = nullptr) const; + void aggregate(QueryStateBase& st, ColKey column_key) const; size_t find_best_node(ParentNode* pn) const; void aggregate_internal(ParentNode* pn, QueryStateBase* st, size_t start, size_t end, @@ -391,6 +372,8 @@ class Query final { friend class SubQueryCount; friend class PrimitiveListCount; friend class metrics::QueryInfo; + template + friend class AggregateHelper; std::string error_code; diff --git a/src/realm/query_conditions_tpl.hpp b/src/realm/query_conditions_tpl.hpp index e6949df4a98..4b9f0634385 100644 --- a/src/realm/query_conditions_tpl.hpp +++ b/src/realm/query_conditions_tpl.hpp @@ -30,11 +30,9 @@ namespace realm { template class QueryStateSum : public QueryStateBase { public: + using Type = T; using ResultType = typename aggregate_operations::Sum::ResultType; - explicit QueryStateSum(size_t limit = -1) - : QueryStateBase(limit) - { - } + using QueryStateBase::QueryStateBase; bool match(size_t, Mixed value) noexcept final { if (!value.is_null()) { @@ -58,13 +56,10 @@ class QueryStateSum : public QueryStateBase { aggregate_operations::Sum::type> m_state; }; -template -class QueryStateMin : public QueryStateBase { +template class State> +class QueryStateMinMax : public QueryStateBase { public: - explicit QueryStateMin(size_t limit = -1) - : QueryStateBase(limit) - { - } + using QueryStateBase::QueryStateBase; bool match(size_t index, Mixed value) noexcept final { if (!value.is_null()) { @@ -75,43 +70,146 @@ class QueryStateMin : public QueryStateBase { ++m_match_count; m_minmax_key = (m_key_values ? m_key_values->get(index) : 0) + m_key_offset; } - return (m_limit > m_match_count); + return m_limit > m_match_count; } - R get_min() const + Mixed get_result() const { - return m_state.is_null() ? R{} : m_state.result(); + return m_state.is_null() ? Mixed() : m_state.result(); } private: - aggregate_operations::Minimum::type> m_state; + State::type> m_state; +}; + +template +class QueryStateMin : public QueryStateMinMax { +public: + using QueryStateMinMax::QueryStateMinMax; }; template -class QueryStateMax : public QueryStateBase { +class QueryStateMax : public QueryStateMinMax { public: - explicit QueryStateMax(size_t limit = -1) - : QueryStateBase(limit) + using QueryStateMinMax::QueryStateMinMax; +}; + +template +class AggregateHelper { +public: + static std::optional sum(const Table& table, const Target& target, ColKey col_key) { + switch (table.get_column_type(col_key)) { + case type_Int: + if (table.is_nullable(col_key)) { + return sum>(target, col_key); + } + else { + return sum(target, col_key); + } + case type_Float: + return sum(target, col_key); + case type_Double: + return sum(target, col_key); + case type_Decimal: + return sum(target, col_key); + case type_Mixed: + return sum(target, col_key); + default: + return std::nullopt; + } } - bool match(size_t index, Mixed value) noexcept final + + static std::optional avg(const Table& table, const Target& target, ColKey col_key, size_t* value_count) { - if (!value.is_null()) { - auto v = value.get(); - if (!m_state.accumulate(v)) { - return true; // no match, continue search - } - ++m_match_count; - m_minmax_key = (m_key_values ? m_key_values->get(index) : 0) + m_key_offset; + switch (table.get_column_type(col_key)) { + case type_Int: + if (table.is_nullable(col_key)) { + return average>(target, col_key, value_count); + } + else { + return average(target, col_key, value_count); + } + case type_Float: + return average(target, col_key, value_count); + case type_Double: + return average(target, col_key, value_count); + case type_Decimal: + return average(target, col_key, value_count); + case type_Mixed: + return average(target, col_key, value_count); + default: + return std::nullopt; } - return (m_limit > m_match_count); } - R get_max() const + + static std::optional min(const Table& table, const Target& target, ColKey col_key, ObjKey* return_ndx) + { + return minmax(table, target, col_key, return_ndx); + } + + static std::optional max(const Table& table, const Target& target, ColKey col_key, ObjKey* return_ndx) { - return m_state.is_null() ? R{} : m_state.result(); + return minmax(table, target, col_key, return_ndx); } private: - aggregate_operations::Maximum::type> m_state; + template + static Mixed average(const Target& target, ColKey col_key, size_t* value_count) + { + QueryStateSum::type> st; + target.template aggregate(st, col_key); + if (value_count) + *value_count = st.result_count(); + if (st.result_count() == 0) + return Mixed(); + + using Result = typename aggregate_operations::Average::type>::ResultType; + return Result(st.result_sum()) / st.result_count(); + } + + template + static Mixed sum(const Target& target, ColKey col_key) + { + QueryStateSum::type> st; + target.template aggregate(st, col_key); + return st.result_sum(); + } + + template