Skip to content

Commit

Permalink
Merge pull request #5864 from realm/tg/aggregates
Browse files Browse the repository at this point in the history
Replace the typed aggregate functions with simpler untyped ones
  • Loading branch information
tgoyne authored Sep 16, 2022
2 parents 001911c + 000dcd9 commit 26639e5
Show file tree
Hide file tree
Showing 41 changed files with 1,189 additions and 1,982 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* Expose `realm_object_get_parent` in the C API (PR [#5851](https://github.com/realm/realm-core/pull/5851))
* Expose `realm_list_find` in the C API (PR [#5848](https://github.com/realm/realm-core/pull/5848))
* Expose `Group::remove_table` in the C API (PR [#5860](https://github.com/realm/realm-core/pull/5860))
* Cut the runtime of aggregate operations on large dictionaries in half ([PR #5864](https://github.com/realm/realm-core/pull/5864)).
* Improve performance of aggregate operations on collections of objects by 2x to 10x ([PR #5864](https://github.com/realm/realm-core/pull/5864)).

### Fixed
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
Expand All @@ -23,7 +25,7 @@
* Throw exception if `Realm::Convert` tries to convert to flexible sync. ([#5798](https://github.com/realm/realm-core/issues/5798), since v11.16.0).

### Breaking changes
* None.
* The typed aggregation functions (e.g. `minimum_int`) on `Table`, `TableView`, and `Query` have been removed and replaced with simpler untyped versions which return `Mixed`. This does not effect SDKs which only used them via the Object Store types.

### Compatibility
* Fileformat: Generates files with format v22. Reads and automatically upgrade from fileformat v5.
Expand Down
34 changes: 34 additions & 0 deletions src/realm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,39 @@ set(REALM_INSTALL_HEADERS
metrics/transaction_info.hpp
) # REALM_INSTALL_HEADERS

set(REALM_NOINST_HEADERS
link_translator.hpp
object_converter.hpp
query_conditions_tpl.hpp
util/cf_str.hpp
util/circular_buffer.hpp
util/cli_args.hpp
util/copy_dir_recursive.hpp
util/demangle.hpp
util/duplicating_logger.hpp
util/enum.hpp
util/ez_websocket.hpp
util/file_is_regular.hpp
util/from_chars.hpp
util/get_file_size.hpp
util/json_parser.hpp
util/load_file.hpp
util/network_ssl.hpp
util/parent_dir.hpp
util/platform_info.hpp
util/quote.hpp
util/random.hpp
util/resource_limits.hpp
util/scratch_allocator.hpp
util/signal_blocker.hpp
util/substitute.hpp
util/thread_exec_guard.hpp
util/time.hpp
util/timestamp_formatter.hpp
util/timestamp_logger.hpp
util/value_reset_guard.hpp
) # REALM_NOINST_HEADERS

list(APPEND REALM_SOURCES
metrics/metrics.cpp
metrics/metric_timer.cpp
Expand All @@ -279,6 +312,7 @@ add_library(Storage STATIC
${REALM_SOURCES}
${UTIL_SOURCES}
${REALM_INSTALL_HEADERS}
${REALM_NOINST_HEADERS}
${REALM_OBJECT_FILES}
$<TARGET_OBJECTS:Bid>
)
Expand Down
54 changes: 44 additions & 10 deletions src/realm/dictionary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,28 +393,67 @@ size_t Dictionary::find_any_key(Mixed key) const noexcept
return m_clusters->get_ndx_with_key(get_internal_obj_key(key), key);
}

namespace {
bool can_minmax(DataType type)
{
switch (type) {
case type_Int:
case type_Float:
case type_Double:
case type_Decimal:
case type_Mixed:
case type_Timestamp:
return true;
default:
return false;
}
}
bool can_sum(DataType type)
{
switch (type) {
case type_Int:
case type_Float:
case type_Double:
case type_Decimal:
case type_Mixed:
return true;
default:
return false;
}
}
} // anonymous namespace

util::Optional<Mixed> Dictionary::min(size_t* return_ndx) const
{
if (!can_minmax(get_value_data_type())) {
return std::nullopt;
}
if (update()) {
return m_clusters->min(return_ndx);
}
if (return_ndx)
*return_ndx = realm::npos;
*return_ndx = realm::not_found;
return Mixed{};
}

util::Optional<Mixed> Dictionary::max(size_t* return_ndx) const
{
if (!can_minmax(get_value_data_type())) {
return std::nullopt;
}
if (update()) {
return m_clusters->max(return_ndx);
}
if (return_ndx)
*return_ndx = realm::npos;
*return_ndx = realm::not_found;
return Mixed{};
}

util::Optional<Mixed> Dictionary::sum(size_t* return_cnt) const
{
if (!can_sum(get_value_data_type())) {
return std::nullopt;
}
if (update()) {
return m_clusters->sum(return_cnt, get_value_data_type());
}
Expand All @@ -425,6 +464,9 @@ util::Optional<Mixed> Dictionary::sum(size_t* return_cnt) const

util::Optional<Mixed> Dictionary::avg(size_t* return_cnt) const
{
if (!can_sum(get_value_data_type())) {
return std::nullopt;
}
if (update()) {
return m_clusters->avg(return_cnt, get_value_data_type());
}
Expand Down Expand Up @@ -1124,14 +1166,6 @@ ObjKey DictionaryLinkValues::get_key(size_t ndx) const
return {};
}

// In contrast to a link list and a link set, a dictionary can contain null links.
// This is because the corresponding key may contain useful information by itself.
bool DictionaryLinkValues::is_obj_valid(size_t ndx) const noexcept
{
Mixed val = m_source.get_any(ndx);
return val.is_type(type_TypedLink);
}

Obj DictionaryLinkValues::get_object(size_t row_ndx) const
{
Mixed val = m_source.get_any(row_ndx);
Expand Down
1 change: 0 additions & 1 deletion src/realm/dictionary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ class DictionaryLinkValues final : public ObjCollectionBase<CollectionBase> {

// Overrides of ObjList:
ObjKey get_key(size_t ndx) const final;
bool is_obj_valid(size_t ndx) const noexcept final;
Obj get_object(size_t row_ndx) const final;

// Overrides of CollectionBase, these simply forward to the underlying dictionary.
Expand Down
5 changes: 0 additions & 5 deletions src/realm/list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,6 @@ class LnkLst final : public ObjCollectionBase<LstBase> {
void swap(size_t ndx1, size_t ndx2) final;

// Overriding members of ObjList:
bool is_obj_valid(size_t) const noexcept final
{
// A link list cannot contain null values
return true;
}
Obj get_object(size_t ndx) const final
{
ObjKey key = this->get(ndx);
Expand Down
2 changes: 1 addition & 1 deletion src/realm/mixed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ bool Mixed::accumulate_numeric_to(Decimal128& destination) const
return did_accumulate;
}

int Mixed::compare(const Mixed& b) const
int Mixed::compare(const Mixed& b) const noexcept
{
// Observe! Changing this function breaks the file format for Set<Mixed>

Expand Down
26 changes: 13 additions & 13 deletions src/realm/mixed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,32 +209,32 @@ class Mixed {
bool is_unresolved_link() const;
bool is_same_type(const Mixed& b) const;
// Will use utf8_compare for strings
int compare(const Mixed& b) const;
int compare(const Mixed& b) const noexcept;
// Will compare strings as arrays of signed chars
int compare_signed(const Mixed& b) const;
bool operator==(const Mixed& other) const
friend bool operator==(const Mixed& a, const Mixed& b) noexcept
{
return compare(other) == 0;
return a.compare(b) == 0;
}
bool operator!=(const Mixed& other) const
friend bool operator!=(const Mixed& a, const Mixed& b) noexcept
{
return compare(other) != 0;
return a.compare(b) != 0;
}
bool operator<(const Mixed& other) const
friend bool operator<(const Mixed& a, const Mixed& b) noexcept
{
return compare(other) < 0;
return a.compare(b) < 0;
}
bool operator>(const Mixed& other) const
friend bool operator>(const Mixed& a, const Mixed& b) noexcept
{
return compare(other) > 0;
return a.compare(b) > 0;
}
bool operator<=(const Mixed& other) const
friend bool operator<=(const Mixed& a, const Mixed& b) noexcept
{
return compare(other) <= 0;
return a.compare(b) <= 0;
}
bool operator>=(const Mixed& other) const
friend bool operator>=(const Mixed& a, const Mixed& b) noexcept
{
return compare(other) >= 0;
return a.compare(b) >= 0;
}

Mixed operator+(const Mixed&) const;
Expand Down
2 changes: 1 addition & 1 deletion src/realm/obj.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ enum class UpdateStatus {
// 'Object' would have been a better name, but it clashes with a class in ObjectStore
class Obj {
public:
Obj()
constexpr Obj()
: m_table(nullptr)
, m_row_ndx(size_t(-1))
, m_storage_version(-1)
Expand Down
10 changes: 2 additions & 8 deletions src/realm/obj_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class ObjList {
virtual size_t size() const = 0;
virtual TableRef get_target_table() const = 0;
virtual ObjKey get_key(size_t ndx) const = 0;
virtual bool is_obj_valid(size_t ndx) const noexcept = 0;
virtual Obj get_object(size_t row_ndx) const = 0;
virtual void sync_if_needed() const = 0;
virtual void get_dependencies(TableVersions&) const = 0;
Expand Down Expand Up @@ -64,18 +63,13 @@ class ObjList {
{
return get_object(ndx);
}
virtual Obj try_get_object(size_t row_ndx) const noexcept
{
REALM_ASSERT(row_ndx < size());
return is_obj_valid(row_ndx) ? get_object(row_ndx) : Obj();
}

template <class F>
void for_each(F func) const
{
auto sz = size();
for (size_t i = 0; i < sz; i++) {
auto o = try_get_object(i);
auto o = get_object(i);
if (o && func(o) == IteratorControl::Stop)
return;
}
Expand All @@ -86,7 +80,7 @@ class ObjList {
{
auto sz = size();
for (size_t i = 0; i < sz; i++) {
auto o = try_get_object(i);
auto o = get_object(i);
if (o) {
T v = o.get<T>(column_key);
if (v == value)
Expand Down
19 changes: 19 additions & 0 deletions src/realm/object-store/collection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,25 @@ Results Collection::snapshot() const
return as_results().snapshot();
}

std::optional<Mixed> Collection::max(ColKey col) const
{
return as_results().max(col);
}

util::Optional<Mixed> Collection::min(ColKey col) const
{
return as_results().min(col);
}

Mixed Collection::sum(ColKey col) const
{
return *as_results().sum(col);
}

util::Optional<Mixed> Collection::average(ColKey col) const
{
return as_results().average(col);
}

NotificationToken Collection::add_notification_callback(CollectionChangeCallback callback,
KeyPathArray key_path_array) &
Expand Down
9 changes: 9 additions & 0 deletions src/realm/object-store/collection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ class Collection {
Results sort(SortDescriptor order) const;
Results sort(std::vector<std::pair<std::string, bool>> const& keypaths) const;

// Get the min/max/average/sum of the given column
// All but sum() returns none when collection is empty, and sum() returns 0
// Throws UnsupportedColumnTypeException for sum/average on timestamp or non-numeric column
// Throws OutOfBoundsIndexException for an out-of-bounds column
util::Optional<Mixed> max(ColKey column = {}) const;
util::Optional<Mixed> min(ColKey column = {}) const;
util::Optional<Mixed> average(ColKey column = {}) const;
Mixed sum(ColKey column = {}) const;

/**
* Adds a `CollectionChangeCallback` to this `Collection`. The `CollectionChangeCallback` is exectuted when
* insertions, modifications or deletions happen on this `Collection`.
Expand Down
Loading

0 comments on commit 26639e5

Please sign in to comment.