Skip to content

Commit

Permalink
Added data_type and array_data structures (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
JohanMabille authored Mar 12, 2024
1 parent e13b837 commit 709f48a
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ OPTION(BUILD_TESTS "sparrow test suite" OFF)
# =====

set(SPARROW_HEADERS
${SPARROW_INCLUDE_DIR}/sparrow/array_data.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer.hpp
${SPARROW_INCLUDE_DIR}/sparrow/data_type.hpp
${SPARROW_INCLUDE_DIR}/sparrow/dynamic_bitset.hpp
${SPARROW_INCLUDE_DIR}/sparrow/iterator.hpp
${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp
Expand Down
43 changes: 43 additions & 0 deletions include/sparrow/array_data.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2024 Man Group Operations Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "sparrow/buffer.hpp"
#include "sparrow/data_type.hpp"
#include "sparrow/dynamic_bitset.hpp"

namespace sparrow
{
struct array_data
{
using block_type = std::uint8_t;
using bitmap_type = dynamic_bitset<block_type>;
using buffer_type = buffer<block_type>;

data_descriptor type;
std::int64_t length = 0;
std::int64_t offset = 0;
// bitmap buffer and null_count
bitmap_type bitmap;
// Other buffers
std::vector<buffer_type> buffers;
std::vector<array_data> child_data;
};

struct null_type
{
};
constexpr null_type null;
}
67 changes: 67 additions & 0 deletions include/sparrow/data_type.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2024 Man Group Operations Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace sparrow
{
// TODO: does not support all types specified by the Arrow specification
// yet
enum class data_type
{
NA = 0,
BOOL,
UINT8,
INT8,
UINT16,
INT16,
UINT32,
INT32,
UINT64,
INT64,
HALF_FLOAT,
FLOAT,
DOUBLE,
// UTF8 variable-length string
STRING,
// Variable-length bytes (no guarantee of UTF8-ness)
BINARY,
// Fixed-size binary. Each value occupies the same number of bytes
FIXED_SIZE_BINARY
};

// For now, a tiny wrapper around data_type
// More data and functions to come
class data_descriptor
{
public:

constexpr data_descriptor()
: data_descriptor(data_type::UINT8)
{
}

constexpr explicit data_descriptor(data_type id)
: m_id(id)
{
}

constexpr data_type id() const { return m_id; }

private:

data_type m_id;
};
}

1 change: 1 addition & 0 deletions include/sparrow/dynamic_bitset.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// Copyright 2024 Man Group Operations Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down

0 comments on commit 709f48a

Please sign in to comment.