Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented arrow data getter / extraction #241

Merged
merged 2 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/sparrow/array.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2024 Man Group Operations Limited

Check notice on line 1 in include/sparrow/array.hpp

View workflow job for this annotation

GitHub Actions / build

Run clang-format on include/sparrow/array.hpp

File include/sparrow/array.hpp does not conform to Custom style guidelines. (lines 31, 37, 54)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,6 +36,14 @@
SPARROW_API array(ArrowArray&& array, ArrowSchema* schema);
SPARROW_API array(ArrowArray* array, ArrowSchema* schema);

SPARROW_API bool owns_arrow_array() const;
SPARROW_API array& get_arrow_array(ArrowArray*&);
SPARROW_API array&& extract_arrow_array(ArrowArray&) &&;

SPARROW_API bool owns_arrow_schema() const;
SPARROW_API array& get_arrow_schema(ArrowSchema*&);
SPARROW_API array&& extract_arrow_schema(ArrowSchema&) &&;

SPARROW_API size_type size() const;
SPARROW_API const_reference operator[](size_type) const;

Expand Down
5 changes: 5 additions & 0 deletions include/sparrow/arrow_array_schema_proxy.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2024 Man Group Operations Limited

Check notice on line 1 in include/sparrow/arrow_array_schema_proxy.hpp

View workflow job for this annotation

GitHub Actions / build

Run clang-format on include/sparrow/arrow_array_schema_proxy.hpp

File include/sparrow/arrow_array_schema_proxy.hpp does not conform to Custom style guidelines. (lines 138)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -225,9 +225,13 @@
*/
[[nodiscard]] SPARROW_API arrow_proxy view();

[[nodiscard]] SPARROW_API bool owns_array() const;
[[nodiscard]] SPARROW_API ArrowArray extract_array();
[[nodiscard]] SPARROW_API ArrowArray& array();
[[nodiscard]] SPARROW_API const ArrowArray& array() const;

[[nodiscard]] SPARROW_API bool owns_schema() const;
[[nodiscard]] SPARROW_API ArrowSchema extract_schema();
[[nodiscard]] SPARROW_API ArrowSchema& schema();
[[nodiscard]] SPARROW_API const ArrowSchema& schema() const;

Expand All @@ -254,6 +258,7 @@
void update_children();
void update_dictionary();
void update_null_count();
void reset();

[[nodiscard]] bool array_created_with_sparrow() const;
[[nodiscard]] bool schema_created_with_sparrow() const;
Expand Down
9 changes: 8 additions & 1 deletion include/sparrow/layout/array_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ namespace sparrow
const_bitmap_iterator bitmap_begin() const;
const_bitmap_iterator bitmap_end() const;


private:

static constexpr std::size_t m_bitmap_buffer_index = 0;

arrow_proxy& get_arrow_proxy();
bitmap_type make_bitmap();

arrow_proxy m_proxy;
Expand All @@ -136,6 +136,8 @@ namespace sparrow
// friend classes
friend class layout_iterator<self_type, false>;
friend class layout_iterator<self_type, true>;
template <class T>
friend class array_wrapper_impl;
};

template <class D>
Expand Down Expand Up @@ -291,6 +293,11 @@ namespace sparrow
return sparrow::next(bitmap_begin(), size());
}

template <class D>
auto array_crtp_base<D>::get_arrow_proxy() -> arrow_proxy&
{
return m_proxy;
}

template <class D>
auto array_crtp_base<D>::make_bitmap() -> bitmap_type
Expand Down
17 changes: 16 additions & 1 deletion include/sparrow/layout/array_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <memory>
#include <variant>

#include "sparrow/arrow_array_schema_proxy.hpp"
#include "sparrow/types/data_traits.hpp"
#include "sparrow/utils/memory.hpp"

Expand Down Expand Up @@ -68,6 +69,7 @@ namespace sparrow

enum data_type data_type() const;
bool is_dictionary() const;
arrow_proxy& get_arrow_proxy();

protected:

Expand All @@ -78,6 +80,7 @@ namespace sparrow

enum data_type m_data_type;
virtual bool is_dictionary_impl() const = 0;
virtual arrow_proxy& get_arrow_proxy_impl() = 0;
virtual wrapper_ptr clone_impl() const = 0;
};

Expand All @@ -103,6 +106,7 @@ namespace sparrow

array_wrapper_impl(const array_wrapper_impl&);
bool is_dictionary_impl() const override;
arrow_proxy& get_arrow_proxy_impl() override;
wrapper_ptr clone_impl() const override;

using storage_type = std::variant<value_ptr<T>, std::shared_ptr<T>, T*>;
Expand Down Expand Up @@ -135,6 +139,11 @@ namespace sparrow
return is_dictionary_impl();
}

inline arrow_proxy& array_wrapper::get_arrow_proxy()
{
return get_arrow_proxy_impl();
}

inline array_wrapper::array_wrapper(enum data_type dt)
: m_data_type(dt)
{
Expand Down Expand Up @@ -206,7 +215,13 @@ namespace sparrow
{
return detail::is_dictionary_encoded_array<T>::get();
}


template <class T>
arrow_proxy& array_wrapper_impl<T>::get_arrow_proxy_impl()
{
return p_array->get_arrow_proxy();
}

template <class T>
auto array_wrapper_impl<T>::clone_impl() const -> wrapper_ptr
{
Expand Down
11 changes: 11 additions & 0 deletions include/sparrow/layout/dictionary_encoded_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,14 @@ namespace sparrow
static keys_layout create_keys_layout(arrow_proxy& proxy);
static values_layout create_values_layout(arrow_proxy& proxy);

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;
keys_layout m_keys_layout;
values_layout p_values_layout;

template <class T>
friend class array_wrapper_impl;
};

/*******************************************
Expand Down Expand Up @@ -246,4 +251,10 @@ namespace sparrow
{
return keys_layout{arrow_proxy{&proxy.array(), &proxy.schema()}};
}

template <std::integral IT>
auto dictionary_encoded_array<IT>::get_arrow_proxy() -> arrow_proxy&
{
return m_proxy;
}
}
10 changes: 10 additions & 0 deletions include/sparrow/layout/null_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,12 @@ namespace sparrow

difference_type ssize() const;

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;

template <class T>
friend class array_wrapper_impl;
};

bool operator==(const null_array& lhs, const null_array& rhs);
Expand Down Expand Up @@ -230,6 +235,11 @@ namespace sparrow
return static_cast<difference_type>(size());
}

inline arrow_proxy& null_array::get_arrow_proxy()
{
return m_proxy;
}

inline bool operator==(const null_array& lhs, const null_array& rhs)
{
return lhs.size() == rhs.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ namespace sparrow
SPARROW_API static acc_length_ptr_variant_type get_acc_lengths_ptr(const array_wrapper& ar);
SPARROW_API std::uint64_t get_run_length(std::uint64_t run_index) const;

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;
std::uint64_t m_encoded_length;

Expand All @@ -85,6 +87,8 @@ namespace sparrow
// friend classes
friend class run_encoded_array_iterator<false>;
friend class run_encoded_array_iterator<true>;
template <class T>
friend class array_wrapper_impl;
};

inline run_end_encoded_array::run_end_encoded_array(arrow_proxy proxy)
Expand Down Expand Up @@ -120,6 +124,11 @@ namespace sparrow
return ret;
}

inline arrow_proxy& run_end_encoded_array::get_arrow_proxy()
{
return m_proxy;
}

inline auto run_end_encoded_array::operator[](std::uint64_t i) -> array_traits::const_reference
{
return static_cast<const run_end_encoded_array*>(this)->operator[](i);
Expand Down
14 changes: 12 additions & 2 deletions include/sparrow/layout/union_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,17 @@ namespace sparrow
using type_id_map = std::array<std::uint8_t, 256>;
static type_id_map parse_type_id_map(std::string_view format_string);

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;
const std::uint8_t * p_type_ids;
std::vector<cloning_ptr<array_wrapper>> m_children;

// map from type-id to child-index
std::array<std::uint8_t, 256> m_type_id_map;


template <class T>
friend class array_wrapper_impl;
};

class dense_union_array : public union_array_crtp_base<dense_union_array>
Expand Down Expand Up @@ -128,6 +132,12 @@ namespace sparrow
return ret;
}

template <class DERIVED>
arrow_proxy& union_array_crtp_base<DERIVED>::get_arrow_proxy()
{
return m_proxy;
}

template <class DERIVED>
union_array_crtp_base<DERIVED>::union_array_crtp_base(arrow_proxy proxy)
: m_proxy(std::move(proxy)),
Expand Down Expand Up @@ -221,4 +231,4 @@ namespace sparrow
{
return i + m_proxy.offset();
}
}
}
36 changes: 35 additions & 1 deletion src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,41 @@ namespace sparrow
: p_array(array_factory(arrow_proxy(array, schema)))
{
}


bool array::owns_arrow_array() const
{
return p_array->get_arrow_proxy().owns_array();
}

array& array::get_arrow_array(ArrowArray*& dst)
{
dst = &(p_array->get_arrow_proxy().array());
return *this;
}

array&& array::extract_arrow_array(ArrowArray& dst) &&
{
dst = p_array->get_arrow_proxy().extract_array();
return std::move(*this);
}

bool array::owns_arrow_schema() const
{
return p_array->get_arrow_proxy().owns_schema();
}

array& array::get_arrow_schema(ArrowSchema*& dst)
{
dst = &(p_array->get_arrow_proxy().schema());
return *this;
}

array&& array::extract_arrow_schema(ArrowSchema& dst) &&
{
dst = p_array->get_arrow_proxy().extract_schema();
return std::move(*this);
}

array::size_type array::size() const
{
return array_size(*p_array);
Expand Down
43 changes: 43 additions & 0 deletions src/arrow_array_schema_proxy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ namespace sparrow
}
}

void arrow_proxy::reset()
{
m_buffers.clear();
m_children.clear();
m_dictionary.reset();
}

bool arrow_proxy::array_created_with_sparrow() const
{
return array().release == &sparrow::release_arrow_array;
Expand Down Expand Up @@ -549,6 +556,16 @@ namespace sparrow
return var.index() == 0 ? *std::get<0>(var) : std::get<1>(var);
}

[[nodiscard]] bool arrow_proxy::owns_array() const
{
return std::holds_alternative<ArrowArray>(m_array);
}

[[nodiscard]] bool arrow_proxy::owns_schema() const
{
return std::holds_alternative<ArrowSchema>(m_schema);
}

[[nodiscard]] const ArrowArray& arrow_proxy::array() const
{
return get_value_reference_of_variant<const ArrowArray>(m_array);
Expand All @@ -569,6 +586,32 @@ namespace sparrow
return get_value_reference_of_variant<ArrowSchema>(m_schema);
}

[[nodiscard]] ArrowArray arrow_proxy::extract_array()
{
if (std::holds_alternative<ArrowArray*>(m_array))
{
throw std::runtime_error("cannot extract an ArrowArray not owned by the structure");
}

ArrowArray res = std::get<ArrowArray>(std::move(m_array));
m_array = ArrowArray{};
reset();
return res;
}

[[nodiscard]] ArrowSchema arrow_proxy::extract_schema()
{
if (std::holds_alternative<ArrowSchema*>(m_schema))
{
throw std::runtime_error("cannot extract an ArrowSchema not owned by the structure");
}

ArrowSchema res = std::get<ArrowSchema>(std::move(m_schema));
m_schema = ArrowSchema{};
reset();
return res;
}

void arrow_proxy::update_null_count()
{
const auto buffer_types = get_buffer_types_from_data_type(data_type());
Expand Down
Loading
Loading