Skip to content

Commit

Permalink
Implemented arrow data getter / extraction (#241)
Browse files Browse the repository at this point in the history
Implemented arrow data getter / extraction
  • Loading branch information
JohanMabille authored Oct 17, 2024
1 parent bd1d621 commit ced2bed
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 5 deletions.
8 changes: 8 additions & 0 deletions include/sparrow/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ namespace sparrow
SPARROW_API array(ArrowArray&& array, ArrowSchema* schema);
SPARROW_API array(ArrowArray* array, ArrowSchema* schema);

SPARROW_API bool owns_arrow_array() const;
SPARROW_API array& get_arrow_array(ArrowArray*&);
SPARROW_API array&& extract_arrow_array(ArrowArray&) &&;

SPARROW_API bool owns_arrow_schema() const;
SPARROW_API array& get_arrow_schema(ArrowSchema*&);
SPARROW_API array&& extract_arrow_schema(ArrowSchema&) &&;

SPARROW_API size_type size() const;
SPARROW_API const_reference operator[](size_type) const;

Expand Down
5 changes: 5 additions & 0 deletions include/sparrow/arrow_array_schema_proxy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,13 @@ namespace sparrow
*/
[[nodiscard]] SPARROW_API arrow_proxy view();

[[nodiscard]] SPARROW_API bool owns_array() const;
[[nodiscard]] SPARROW_API ArrowArray extract_array();
[[nodiscard]] SPARROW_API ArrowArray& array();
[[nodiscard]] SPARROW_API const ArrowArray& array() const;

[[nodiscard]] SPARROW_API bool owns_schema() const;
[[nodiscard]] SPARROW_API ArrowSchema extract_schema();
[[nodiscard]] SPARROW_API ArrowSchema& schema();
[[nodiscard]] SPARROW_API const ArrowSchema& schema() const;

Expand All @@ -254,6 +258,7 @@ namespace sparrow
void update_children();
void update_dictionary();
void update_null_count();
void reset();

[[nodiscard]] bool array_created_with_sparrow() const;
[[nodiscard]] bool schema_created_with_sparrow() const;
Expand Down
9 changes: 8 additions & 1 deletion include/sparrow/layout/array_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ namespace sparrow
const_bitmap_iterator bitmap_begin() const;
const_bitmap_iterator bitmap_end() const;


private:

static constexpr std::size_t m_bitmap_buffer_index = 0;

arrow_proxy& get_arrow_proxy();
bitmap_type make_bitmap();

arrow_proxy m_proxy;
Expand All @@ -136,6 +136,8 @@ namespace sparrow
// friend classes
friend class layout_iterator<self_type, false>;
friend class layout_iterator<self_type, true>;
template <class T>
friend class array_wrapper_impl;
};

template <class D>
Expand Down Expand Up @@ -291,6 +293,11 @@ namespace sparrow
return sparrow::next(bitmap_begin(), size());
}

template <class D>
auto array_crtp_base<D>::get_arrow_proxy() -> arrow_proxy&
{
return m_proxy;
}

template <class D>
auto array_crtp_base<D>::make_bitmap() -> bitmap_type
Expand Down
17 changes: 16 additions & 1 deletion include/sparrow/layout/array_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <memory>
#include <variant>

#include "sparrow/arrow_array_schema_proxy.hpp"
#include "sparrow/types/data_traits.hpp"
#include "sparrow/utils/memory.hpp"

Expand Down Expand Up @@ -68,6 +69,7 @@ namespace sparrow

enum data_type data_type() const;
bool is_dictionary() const;
arrow_proxy& get_arrow_proxy();

protected:

Expand All @@ -78,6 +80,7 @@ namespace sparrow

enum data_type m_data_type;
virtual bool is_dictionary_impl() const = 0;
virtual arrow_proxy& get_arrow_proxy_impl() = 0;
virtual wrapper_ptr clone_impl() const = 0;
};

Expand All @@ -103,6 +106,7 @@ namespace sparrow

array_wrapper_impl(const array_wrapper_impl&);
bool is_dictionary_impl() const override;
arrow_proxy& get_arrow_proxy_impl() override;
wrapper_ptr clone_impl() const override;

using storage_type = std::variant<value_ptr<T>, std::shared_ptr<T>, T*>;
Expand Down Expand Up @@ -135,6 +139,11 @@ namespace sparrow
return is_dictionary_impl();
}

inline arrow_proxy& array_wrapper::get_arrow_proxy()
{
return get_arrow_proxy_impl();
}

inline array_wrapper::array_wrapper(enum data_type dt)
: m_data_type(dt)
{
Expand Down Expand Up @@ -206,7 +215,13 @@ namespace sparrow
{
return detail::is_dictionary_encoded_array<T>::get();
}


template <class T>
arrow_proxy& array_wrapper_impl<T>::get_arrow_proxy_impl()
{
return p_array->get_arrow_proxy();
}

template <class T>
auto array_wrapper_impl<T>::clone_impl() const -> wrapper_ptr
{
Expand Down
11 changes: 11 additions & 0 deletions include/sparrow/layout/dictionary_encoded_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,14 @@ namespace sparrow
static keys_layout create_keys_layout(arrow_proxy& proxy);
static values_layout create_values_layout(arrow_proxy& proxy);

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;
keys_layout m_keys_layout;
values_layout p_values_layout;

template <class T>
friend class array_wrapper_impl;
};

/*******************************************
Expand Down Expand Up @@ -246,4 +251,10 @@ namespace sparrow
{
return keys_layout{arrow_proxy{&proxy.array(), &proxy.schema()}};
}

template <std::integral IT>
auto dictionary_encoded_array<IT>::get_arrow_proxy() -> arrow_proxy&
{
return m_proxy;
}
}
10 changes: 10 additions & 0 deletions include/sparrow/layout/null_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,12 @@ namespace sparrow

difference_type ssize() const;

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;

template <class T>
friend class array_wrapper_impl;
};

bool operator==(const null_array& lhs, const null_array& rhs);
Expand Down Expand Up @@ -230,6 +235,11 @@ namespace sparrow
return static_cast<difference_type>(size());
}

inline arrow_proxy& null_array::get_arrow_proxy()
{
return m_proxy;
}

inline bool operator==(const null_array& lhs, const null_array& rhs)
{
return lhs.size() == rhs.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ namespace sparrow
SPARROW_API static acc_length_ptr_variant_type get_acc_lengths_ptr(const array_wrapper& ar);
SPARROW_API std::uint64_t get_run_length(std::uint64_t run_index) const;

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;
std::uint64_t m_encoded_length;

Expand All @@ -85,6 +87,8 @@ namespace sparrow
// friend classes
friend class run_encoded_array_iterator<false>;
friend class run_encoded_array_iterator<true>;
template <class T>
friend class array_wrapper_impl;
};

inline run_end_encoded_array::run_end_encoded_array(arrow_proxy proxy)
Expand Down Expand Up @@ -120,6 +124,11 @@ namespace sparrow
return ret;
}

inline arrow_proxy& run_end_encoded_array::get_arrow_proxy()
{
return m_proxy;
}

inline auto run_end_encoded_array::operator[](std::uint64_t i) -> array_traits::const_reference
{
return static_cast<const run_end_encoded_array*>(this)->operator[](i);
Expand Down
14 changes: 12 additions & 2 deletions include/sparrow/layout/union_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,17 @@ namespace sparrow
using type_id_map = std::array<std::uint8_t, 256>;
static type_id_map parse_type_id_map(std::string_view format_string);

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;
const std::uint8_t * p_type_ids;
std::vector<cloning_ptr<array_wrapper>> m_children;

// map from type-id to child-index
std::array<std::uint8_t, 256> m_type_id_map;


template <class T>
friend class array_wrapper_impl;
};

class dense_union_array : public union_array_crtp_base<dense_union_array>
Expand Down Expand Up @@ -128,6 +132,12 @@ namespace sparrow
return ret;
}

template <class DERIVED>
arrow_proxy& union_array_crtp_base<DERIVED>::get_arrow_proxy()
{
return m_proxy;
}

template <class DERIVED>
union_array_crtp_base<DERIVED>::union_array_crtp_base(arrow_proxy proxy)
: m_proxy(std::move(proxy)),
Expand Down Expand Up @@ -221,4 +231,4 @@ namespace sparrow
{
return i + m_proxy.offset();
}
}
}
36 changes: 35 additions & 1 deletion src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,41 @@ namespace sparrow
: p_array(array_factory(arrow_proxy(array, schema)))
{
}


bool array::owns_arrow_array() const
{
return p_array->get_arrow_proxy().owns_array();
}

array& array::get_arrow_array(ArrowArray*& dst)
{
dst = &(p_array->get_arrow_proxy().array());
return *this;
}

array&& array::extract_arrow_array(ArrowArray& dst) &&
{
dst = p_array->get_arrow_proxy().extract_array();
return std::move(*this);
}

bool array::owns_arrow_schema() const
{
return p_array->get_arrow_proxy().owns_schema();
}

array& array::get_arrow_schema(ArrowSchema*& dst)
{
dst = &(p_array->get_arrow_proxy().schema());
return *this;
}

array&& array::extract_arrow_schema(ArrowSchema& dst) &&
{
dst = p_array->get_arrow_proxy().extract_schema();
return std::move(*this);
}

array::size_type array::size() const
{
return array_size(*p_array);
Expand Down
43 changes: 43 additions & 0 deletions src/arrow_array_schema_proxy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ namespace sparrow
}
}

void arrow_proxy::reset()
{
m_buffers.clear();
m_children.clear();
m_dictionary.reset();
}

bool arrow_proxy::array_created_with_sparrow() const
{
return array().release == &sparrow::release_arrow_array;
Expand Down Expand Up @@ -549,6 +556,16 @@ namespace sparrow
return var.index() == 0 ? *std::get<0>(var) : std::get<1>(var);
}

[[nodiscard]] bool arrow_proxy::owns_array() const
{
return std::holds_alternative<ArrowArray>(m_array);
}

[[nodiscard]] bool arrow_proxy::owns_schema() const
{
return std::holds_alternative<ArrowSchema>(m_schema);
}

[[nodiscard]] const ArrowArray& arrow_proxy::array() const
{
return get_value_reference_of_variant<const ArrowArray>(m_array);
Expand All @@ -569,6 +586,32 @@ namespace sparrow
return get_value_reference_of_variant<ArrowSchema>(m_schema);
}

[[nodiscard]] ArrowArray arrow_proxy::extract_array()
{
if (std::holds_alternative<ArrowArray*>(m_array))
{
throw std::runtime_error("cannot extract an ArrowArray not owned by the structure");
}

ArrowArray res = std::get<ArrowArray>(std::move(m_array));
m_array = ArrowArray{};
reset();
return res;
}

[[nodiscard]] ArrowSchema arrow_proxy::extract_schema()
{
if (std::holds_alternative<ArrowSchema*>(m_schema))
{
throw std::runtime_error("cannot extract an ArrowSchema not owned by the structure");
}

ArrowSchema res = std::get<ArrowSchema>(std::move(m_schema));
m_schema = ArrowSchema{};
reset();
return res;
}

void arrow_proxy::update_null_count()
{
const auto buffer_types = get_buffer_types_from_data_type(data_type());
Expand Down
Loading

0 comments on commit ced2bed

Please sign in to comment.