Skip to content

Commit

Permalink
Green writes
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd committed Sep 6, 2024
1 parent 9d7e806 commit b5ba93e
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 27 deletions.
41 changes: 34 additions & 7 deletions src/nanoarrow/common/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ static void ArrowArrayReleaseInternal(struct ArrowArray* array) {
ArrowBitmapReset(&private_data->bitmap);
ArrowBufferReset(&private_data->buffers[0]);
ArrowBufferReset(&private_data->buffers[1]);
// TODO: missing reset on third fixed buffer?
ArrowFree(private_data->buffer_data);
for (int64_t i = 0; i < private_data->n_variadic_buffers; ++i) {
ArrowBufferReset(&private_data->variadic_buffers[i]);
}
ArrowFree(private_data->variadic_buffers);
ArrowFree(private_data->variadic_buffer_sizes);
ArrowFree(private_data);
}

Expand Down Expand Up @@ -74,8 +81,6 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
case NANOARROW_TYPE_RUN_END_ENCODED:
case NANOARROW_TYPE_BINARY_VIEW:
case NANOARROW_TYPE_STRING_VIEW:
array->n_buffers = 0;
break;

Expand Down Expand Up @@ -107,6 +112,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_DENSE_UNION:
case NANOARROW_TYPE_BINARY_VIEW:
case NANOARROW_TYPE_STRING_VIEW:
array->n_buffers = 2;
break;
case NANOARROW_TYPE_STRING:
Expand Down Expand Up @@ -151,12 +158,16 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
ArrowBitmapInit(&private_data->bitmap);
ArrowBufferInit(&private_data->buffers[0]);
ArrowBufferInit(&private_data->buffers[1]);
private_data->buffer_data[0] = NULL;
private_data->buffer_data[1] = NULL;
private_data->buffer_data[2] = NULL;
private_data->buffer_data = ArrowMalloc(sizeof(void*) * NANOARROW_MAX_FIXED_BUFFERS);
for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; ++i) {
private_data->buffer_data[i] = NULL;
}
private_data->n_variadic_buffers = 0;
private_data->variadic_buffers = NULL;
private_data->variadic_buffer_sizes = NULL;

array->private_data = private_data;
array->buffers = (const void**)(&private_data->buffer_data);
array->buffers = (const void**)(private_data->buffer_data);

// These are not technically "storage" in the sense that they do not appear
// in the ArrowSchemaView's storage_type member; however, allowing them here
Expand Down Expand Up @@ -452,14 +463,30 @@ static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) {
NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary));
}

const int64_t n_vbuf = private_data->n_variadic_buffers;
if (n_vbuf > 0) {
// TODO: the hard-coded 2's are tied to binary/string view implementations
// but maybe we should make them generic
private_data->buffer_data =
realloc(private_data->buffer_data, sizeof(void*) * (2 + n_vbuf + 1));
for (int64_t i = 0; i < n_vbuf; ++i) {
private_data->buffer_data[2 + i] = private_data->variadic_buffers[i].data;
}
private_data->buffer_data[n_vbuf + 2] = private_data->variadic_buffer_sizes;
array->buffers = (const void**)(private_data->buffer_data);
}

return NANOARROW_OK;
}

static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;

for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
// TODO: this does not seem correct
const int64_t limit =
private_data->n_variadic_buffers == 0 ? NANOARROW_MAX_FIXED_BUFFERS : 2;
for (int64_t i = 0; i < limit; i++) {
private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
}

Expand Down
19 changes: 15 additions & 4 deletions src/nanoarrow/common/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ TEST(ArrayTest, ArrayTestAppendToStringViewArray) {
// Check that we can reserve
ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
EXPECT_EQ(ArrowArrayBuffer(&array, 1)->capacity_bytes,
(5 + 1) * sizeof(union ArrowBinaryViewType));
5 * sizeof(union ArrowBinaryViewType));

EXPECT_EQ(ArrowArrayAppendString(&array, "1234"_asv), NANOARROW_OK);
EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
Expand All @@ -916,10 +916,21 @@ TEST(ArrayTest, ArrayTestAppendToStringViewArray) {
EXPECT_EQ(array.length, 5);
EXPECT_EQ(array.null_count, 2);
auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
auto data_buffer = reinterpret_cast<const int64_t*>(array.buffers[1]);
auto out_of_line_buffer = reinterpret_cast<const char*>(array.buffers[2]);
auto inline_buffer =
reinterpret_cast<const union ArrowBinaryViewType*>(array.buffers[1]);
auto variable_buffer = reinterpret_cast<const char*>(array.buffers[2]);
auto sizes_buffer = reinterpret_cast<const int64_t*>(array.buffers[3]);

EXPECT_EQ(validity_buffer[0], 0b00011001);
EXPECT_EQ(memcmp(data_buffer, "1234", 4), 0);
EXPECT_EQ(memcmp(inline_buffer[0].inlined.data, "1234", 4), 0);
EXPECT_EQ(inline_buffer[0].inlined.size, 4);
EXPECT_EQ(memcmp(inline_buffer[3].ref.data, "long", 4), 0);
EXPECT_EQ(inline_buffer[3].ref.size, 27);
EXPECT_EQ(inline_buffer[3].ref.buffer_index, 0);
EXPECT_EQ(inline_buffer[3].ref.offset, 0);

EXPECT_EQ(memcmp(variable_buffer, "longer_than_the_inline_size", 27), 0);
EXPECT_EQ(sizes_buffer[0], 27);

// TODO: need to add overload for ViewArrayAsBytes
/*
Expand Down
32 changes: 23 additions & 9 deletions src/nanoarrow/common/inline_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <float.h>
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "nanoarrow/common/inline_buffer.h"
Expand Down Expand Up @@ -506,15 +507,28 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
if (value.size_bytes <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
memcpy(bvt.inlined.data, value.data.as_char, value.size_bytes);
} else {
struct ArrowBuffer* current_out_buffer = ArrowArrayBuffer(array, 2);
// TODO: with C11 would be great to static_assert that sizeof(bvt.ref.data) <=
// NANOARROW_BINARY_VIEW_INLINE_SIZE
// assert(sizeof(bvt.ref.data) <= NANOARROW_BINARY_VIEW_INLINE_SIZE);
memcpy(bvt.ref.data, value.data.as_char, sizeof(bvt.ref.data));
// TODO: do not hardcode buffer position
bvt.ref.buffer_index = 0;
bvt.ref.offset = current_out_buffer->size_bytes;
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(current_out_buffer, &bvt, sizeof(bvt)));
int64_t n_vbufs = private_data->n_variadic_buffers;
if (n_vbufs == 0 ||
private_data->variadic_buffers[n_vbufs - 1].size_bytes + value.size_bytes >
NANOARROW_BINARY_VIEW_BLOCK_SIZE) {
// TODO: ArrowMalloc?
++n_vbufs;
// TODO: these should be realloc for > 0 case
private_data->variadic_buffers =
(struct ArrowBuffer*)malloc(sizeof(struct ArrowBuffer) * (n_vbufs));
private_data->variadic_buffer_sizes =
(int64_t*)malloc(sizeof(int64_t) * (n_vbufs));
ArrowBufferInit(&private_data->variadic_buffers[n_vbufs - 1]);
private_data->n_variadic_buffers = n_vbufs;
}

struct ArrowBuffer* variadic_buf = &private_data->variadic_buffers[n_vbufs - 1];
memcpy(bvt.ref.data, value.data.as_char, NANOARROW_BINARY_VIEW_PREVIEW_SIZE);
bvt.ref.buffer_index = n_vbufs - 1;
bvt.ref.offset = variadic_buf->size_bytes;
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(variadic_buf, value.data.as_char, value.size_bytes));
private_data->variadic_buffer_sizes[n_vbufs - 1] = variadic_buf->size_bytes;
}
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &bvt, sizeof(bvt)));
} else {
Expand Down
19 changes: 12 additions & 7 deletions src/nanoarrow/common/inline_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,12 +626,8 @@ enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_DATA_VIEW
};

/// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout
/// \brief The maximum number of fixed buffers in an ArrowArrayView or ArrowLayout
/// \ingroup nanoarrow-array-view
///
/// All currently supported types have 3 buffers or fewer; however, future types
/// may involve a variable number of buffers (e.g., string view). These buffers
/// will be represented by separate members of the ArrowArrayView or ArrowLayout.
#define NANOARROW_MAX_FIXED_BUFFERS 3

/// \brief An non-owning view of a string
Expand Down Expand Up @@ -838,8 +834,8 @@ struct ArrowArrayPrivateData {

// The array of pointers to buffers. This must be updated after a sequence
// of appends to synchronize its values with the actual buffer addresses
// (which may have ben reallocated uring that time)
const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS];
// (which may have ben reallocated during that time)
const void** buffer_data;

// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;
Expand All @@ -851,6 +847,15 @@ struct ArrowArrayPrivateData {
// In the future this could be replaced with a type id<->child mapping
// to support constructing unions in append mode where type_id != child_index
int8_t union_type_id_is_child_index;

// Number of variadic buffers for binary view types
int64_t n_variadic_buffers;

// Variadic buffers for binary view types
struct ArrowBuffer* variadic_buffers;

// Size of each variadic buffer in bytes
int64_t* variadic_buffer_sizes;
};

/// \brief A representation of an interval.
Expand Down

0 comments on commit b5ba93e

Please sign in to comment.