Skip to content

Commit

Permalink
apacheGH-34316: [Python] FixedSizeListArray.from_arrays supports mask…
Browse files Browse the repository at this point in the history
… parameter (apache#39396)

### What changes are included in this PR?

Add `mask` / `null_bitmap` parameters in corresponding Cython / C++ `FixedSizeListArray` methods, and propagate this bitmap instead of using the current dummy `validity_buf`.

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes, `mask` parameter has been added to `FixedSizeListArray.from_arrays`
* Closes: apache#34316

Authored-by: LucasG0 <guillermou.lucas@gmail.com>
Signed-off-by: Will Jones <willjones127@gmail.com>
  • Loading branch information
LucasG0 authored and clayburn committed Jan 23, 2024
1 parent b2961c7 commit 8022cea
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 18 deletions.
16 changes: 8 additions & 8 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,8 @@ const std::shared_ptr<DataType>& FixedSizeListArray::value_type() const {
const std::shared_ptr<Array>& FixedSizeListArray::values() const { return values_; }

Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
const std::shared_ptr<Array>& values, int32_t list_size) {
const std::shared_ptr<Array>& values, int32_t list_size,
std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
if (list_size <= 0) {
return Status::Invalid("list_size needs to be a strict positive integer");
}
Expand All @@ -905,14 +906,14 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
}
int64_t length = values->length() / list_size;
auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
std::shared_ptr<Buffer> validity_buf;

return std::make_shared<FixedSizeListArray>(list_type, length, values, validity_buf,
/*null_count=*/0, /*offset=*/0);
return std::make_shared<FixedSizeListArray>(list_type, length, values, null_bitmap,
null_count);
}

Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type) {
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
if (type->id() != Type::FIXED_SIZE_LIST) {
return Status::TypeError("Expected fixed size list type, got ", type->ToString());
}
Expand All @@ -926,10 +927,9 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
"The length of the values Array needs to be a multiple of the list size");
}
int64_t length = values->length() / list_type.list_size();
std::shared_ptr<Buffer> validity_buf;

return std::make_shared<FixedSizeListArray>(type, length, values, validity_buf,
/*null_count=*/0, /*offset=*/0);
return std::make_shared<FixedSizeListArray>(type, length, values, null_bitmap,
null_count);
}

Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
Expand Down
16 changes: 12 additions & 4 deletions cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -599,17 +599,25 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
///
/// \param[in] values Array containing list values
/// \param[in] list_size The fixed length of each list
/// \param[in] null_bitmap Optional validity bitmap
/// \param[in] null_count Optional null count in null_bitmap
/// \return Will have length equal to values.length() / list_size
static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
int32_t list_size);
static Result<std::shared_ptr<Array>> FromArrays(
const std::shared_ptr<Array>& values, int32_t list_size,
std::shared_ptr<Buffer> null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount);

/// \brief Construct FixedSizeListArray from child value array and type
///
/// \param[in] values Array containing list values
/// \param[in] type The fixed sized list type
/// \param[in] null_bitmap Optional validity bitmap
/// \param[in] null_count Optional null count in null_bitmap
/// \return Will have length equal to values.length() / type.list_size()
static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
std::shared_ptr<DataType> type);
static Result<std::shared_ptr<Array>> FromArrays(
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
std::shared_ptr<Buffer> null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount);

protected:
void SetData(const std::shared_ptr<ArrayData>& data);
Expand Down
13 changes: 9 additions & 4 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2484,7 +2484,7 @@ cdef class MapArray(ListArray):
Examples
--------
First, let's understand the structure of our dataset when viewed in a rectangular data model.
First, let's understand the structure of our dataset when viewed in a rectangular data model.
The total of 5 respondents answered the question "How much did you like the movie x?".
The value -1 in the integer array means that the value is missing. The boolean array
represents the null bitmask corresponding to the missing values in the integer array.
Expand Down Expand Up @@ -2590,7 +2590,7 @@ cdef class FixedSizeListArray(BaseListArray):
"""

@staticmethod
def from_arrays(values, list_size=None, DataType type=None):
def from_arrays(values, list_size=None, DataType type=None, mask=None):
"""
Construct FixedSizeListArray from array of values and a list length.
Expand All @@ -2602,6 +2602,9 @@ cdef class FixedSizeListArray(BaseListArray):
type : DataType, optional
If not specified, a default ListType with the values' type and
`list_size` length is used.
mask : Array (boolean type), optional
Indicate which values are null (True) or not null (False).
Returns
-------
Expand Down Expand Up @@ -2652,19 +2655,21 @@ cdef class FixedSizeListArray(BaseListArray):

_values = asarray(values)

c_mask = c_mask_inverted_from_obj(mask, None)

if type is not None:
if list_size is not None:
raise ValueError("Cannot specify both list_size and type")
with nogil:
c_result = CFixedSizeListArray.FromArraysAndType(
_values.sp_array, type.sp_type)
_values.sp_array, type.sp_type, c_mask)
else:
if list_size is None:
raise ValueError("Should specify one of list_size and type")
_list_size = <int32_t>list_size
with nogil:
c_result = CFixedSizeListArray.FromArrays(
_values.sp_array, _list_size)
_values.sp_array, _list_size, c_mask)
cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
result.validate()
return result
Expand Down
8 changes: 6 additions & 2 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -673,11 +673,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray):
@staticmethod
CResult[shared_ptr[CArray]] FromArrays(
const shared_ptr[CArray]& values, int32_t list_size)
const shared_ptr[CArray]& values,
int32_t list_size,
shared_ptr[CBuffer] null_bitmap)

@staticmethod
CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
const shared_ptr[CArray]& values, shared_ptr[CDataType])
const shared_ptr[CArray]& values,
shared_ptr[CDataType],
shared_ptr[CBuffer] null_bitmap)

int64_t value_offset(int i)
int64_t value_length(int i)
Expand Down
10 changes: 10 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,16 @@ def test_fixed_size_list_from_arrays():
assert result.type.equals(typ)
assert result.type.value_field.name == "name"

result = pa.FixedSizeListArray.from_arrays(values,
type=typ,
mask=pa.array([False, True, False]))
assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]

result = pa.FixedSizeListArray.from_arrays(values,
list_size=4,
mask=pa.array([False, True, False]))
assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]

# raise on invalid values / list_size
with pytest.raises(ValueError):
pa.FixedSizeListArray.from_arrays(values, -4)
Expand Down

0 comments on commit 8022cea

Please sign in to comment.