Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more efficient columnar deserialization #716

Merged
merged 3 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -233,15 +233,19 @@ template <dataset_type_tag dataset_type_> class Dataset {
}
constexpr bool is_row_based(Idx const i) const { return is_row_based(buffers_[i]); }
constexpr bool is_row_based(Buffer const& buffer) const { return buffer.data != nullptr; }
constexpr bool is_columnar(std::string_view component) const {
constexpr bool is_columnar(std::string_view component, bool with_attribute_buffers = false) const {
Idx const idx = find_component(component, false);
if (idx == invalid_index) {
return false;
}
return is_columnar(idx);
return is_columnar(idx, with_attribute_buffers);
}
constexpr bool is_columnar(Idx const i, bool with_attribute_buffers = false) const {
return is_columnar(buffers_[i], with_attribute_buffers);
}
constexpr bool is_columnar(Buffer const& buffer, bool with_attribute_buffers = false) const {
return !is_row_based(buffer) && !(with_attribute_buffers && buffer.attributes.empty());
}
constexpr bool is_columnar(Idx const i) const { return !is_row_based(i); }
constexpr bool is_columnar(Buffer const& buffer) const { return !is_row_based(buffer); }

Idx find_component(std::string_view component, bool required = false) const {
auto const found = std::ranges::find_if(dataset_info_.component_info, [component](ComponentInfo const& x) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ concept row_based_or_columnar_c = std::derived_from<T, row_based_t> || std::deri
template <row_based_or_columnar_c T> constexpr bool is_row_based_v = std::derived_from<T, row_based_t>;
template <row_based_or_columnar_c T> constexpr bool is_columnar_v = std::derived_from<T, columnar_t>;

// obtain attribute buffers of a columnar dataset buffer, ordered by the provided meta attributes.
//
// If none of the provided meta attributes are present in the provided attribute buffers, the result is empty.
// Otherwise, returns a list of attribute buffers with the same order as the input attribute order.
// The attribute buffers are copies of the associated attribute buffers, when provided, and otherwise empty.
template <typename BufferType>
requires requires(BufferType const& b) {
{ b.attributes } -> std::convertible_to<std::vector<AttributeBuffer<typename BufferType::Data>>>;
Expand All @@ -46,6 +51,11 @@ reordered_attribute_buffers(BufferType& buffer, std::span<MetaAttribute const* c
}
return AttributeBuffer<void>{};
});

if (std::ranges::all_of(result, [](auto const& attribute_buffer) { return attribute_buffer.data == nullptr; })) {
result = {};
TonyXiang8787 marked this conversation as resolved.
Show resolved Hide resolved
}

return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ class Deserializer {
void parse_component(Idx component_idx) {
if (dataset_handler_.is_row_based(component_idx)) {
parse_component(row_based, component_idx);
} else {
} else if (dataset_handler_.is_columnar(component_idx, true)) {
TonyXiang8787 marked this conversation as resolved.
Show resolved Hide resolved
parse_component(columnar, component_idx);
}
}
Expand All @@ -713,7 +713,7 @@ class Deserializer {
auto const& buffer = dataset_handler_.get_buffer(component_idx);

assert(dataset_handler_.is_row_based(buffer) == detail::is_row_based_v<row_or_column_t>);
assert(dataset_handler_.is_columnar(buffer) == detail::is_columnar_v<row_or_column_t>);
assert(dataset_handler_.is_columnar(buffer, true) == detail::is_columnar_v<row_or_column_t>);
assert(is_row_based(buffer) == detail::is_row_based_v<row_or_column_t>);
assert(is_columnar(buffer) == detail::is_columnar_v<row_or_column_t>);

Expand Down Expand Up @@ -779,23 +779,48 @@ class Deserializer {
if (msg_data.size == 0) {
return;
}

// set offset and skip array header
offset_ = msg_data.offset;
parse_map_array<visit_array_t, move_forward>();

for (element_number_ = 0; element_number_ != msg_data.size; ++element_number_) {
BufferView const element_buffer = advance(buffer_view, element_number_);
// check the element is map or array
auto const element_visitor = parse_map_array<visit_map_array_t, move_forward>();
if (element_visitor.is_map) {
parse_map_element(row_or_column_tag, element_buffer, element_visitor.size, component);
} else {
parse_array_element(row_or_column_tag, element_buffer, element_visitor.size, component, attributes);
}
parse_element(row_or_column_tag, element_buffer, component, attributes);
}
element_number_ = -1;
offset_ = 0;
}

void parse_element(row_based_t tag, BufferView const& buffer_view, MetaComponent const& component,
std::span<MetaAttribute const* const> attributes) {
assert(is_row_based(buffer_view));

auto const element_visitor = parse_map_array<visit_map_array_t, move_forward>();
if (element_visitor.is_map) {
parse_map_element(tag, buffer_view, element_visitor.size, component);
} else {
parse_array_element(tag, buffer_view, element_visitor.size, component, attributes);
}
}

void parse_element(columnar_t tag, BufferView const& buffer_view, MetaComponent const& component,
std::span<MetaAttribute const* const> attributes) {
assert(is_columnar(buffer_view));

auto const element_visitor = parse_map_array<visit_map_array_t, stay_offset>();
if (element_visitor.is_map) {
parse_map_array<visit_map_array_t, move_forward>();
parse_map_element(tag, buffer_view, element_visitor.size, component);
} else if (!buffer_view.reordered_attribute_buffers.empty()) {
parse_map_array<visit_map_array_t, move_forward>();
parse_array_element(tag, buffer_view, element_visitor.size, component, attributes);
} else {
parse_skip();
TonyXiang8787 marked this conversation as resolved.
Show resolved Hide resolved
}
}

void parse_map_element(row_based_t tag, BufferView const& buffer_view, Idx map_size,
MetaComponent const& component) {
while (map_size-- != 0) {
Expand All @@ -813,6 +838,8 @@ class Deserializer {

void parse_map_element(columnar_t /*tag*/, BufferView const& buffer_view, Idx map_size,
MetaComponent const& /*component*/) {
assert(!buffer_view.buffer->attributes.empty());

while (map_size-- != 0) {
attribute_key_ = parse_string();
if (auto it = std::ranges::find_if(buffer_view.buffer->attributes,
Expand Down Expand Up @@ -936,7 +963,9 @@ class Deserializer {
assert(buffer_view.buffer != nullptr);
return is_columnar(*buffer_view.buffer);
}
static constexpr bool is_columnar(WritableDataset::Buffer const& buffer) { return buffer.data == nullptr; }
static constexpr bool is_columnar(WritableDataset::Buffer const& buffer) {
return buffer.data == nullptr && !buffer.attributes.empty();
}

[[noreturn]] void handle_error(std::exception const& e) {
std::stringstream ss;
Expand Down
Loading