Skip to content

Commit

Permalink
Replace ArrayData::new() with ArrayData::try_new() and `unsafe Ar…
Browse files Browse the repository at this point in the history
…rayData::new_unchecked` (#822)

* Replace `ArrayData::new()` with `ArrayData::try_new()` and `unsafe ArrayData::new_unchecked`

* Fix compile for simd

* remove unsafe in benches
  • Loading branch information
alamb authored Oct 13, 2021
1 parent e898de5 commit 058da05
Show file tree
Hide file tree
Showing 42 changed files with 1,326 additions and 893 deletions.
12 changes: 2 additions & 10 deletions arrow/benches/array_from_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,11 @@ use criterion::Criterion;
extern crate arrow;

use arrow::array::*;
use arrow::buffer::Buffer;
use arrow::datatypes::*;
use std::{convert::TryFrom, sync::Arc};

fn array_from_vec(n: usize) {
let mut v: Vec<u8> = Vec::with_capacity(n);
for i in 0..n {
v.push((i & 0xffff) as u8);
}
let arr_data = ArrayDataBuilder::new(DataType::Int32)
.add_buffer(Buffer::from(v))
.build();
criterion::black_box(Int32Array::from(arr_data));
let v: Vec<i32> = (0..n as i32).collect();
criterion::black_box(Int32Array::from(v));
}

fn array_string_from_vec(n: usize) {
Expand Down
9 changes: 6 additions & 3 deletions arrow/examples/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ fn main() {
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.null_bit_buffer(Buffer::from([0b00000101]))
.build();
.build()
.unwrap();
let binary_array = StringArray::from(array_data);
println!("{:?}", binary_array);

Expand All @@ -92,7 +93,8 @@ fn main() {
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build();
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
Expand All @@ -105,7 +107,8 @@ fn main() {
.len(3)
.add_buffer(value_offsets)
.add_child_data(value_data)
.build();
.build()
.unwrap();
let list_array = ListArray::from(list_data);

println!("{:?}", list_array);
Expand Down
167 changes: 92 additions & 75 deletions arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,15 +377,17 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
DataType::Null => Arc::new(NullArray::new(length)),
DataType::Boolean => {
let null_buf: Buffer = MutableBuffer::new_null(length).into();
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(null_buf.clone()),
0,
vec![null_buf],
vec![],
))
make_array(unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(null_buf.clone()),
0,
vec![null_buf],
vec![],
)
})
}
DataType::Int8 => new_null_sized_array::<Int8Type>(data_type, length),
DataType::UInt8 => new_null_sized_array::<UInt8Type>(data_type, length),
Expand Down Expand Up @@ -414,15 +416,17 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
new_null_sized_array::<IntervalDayTimeType>(data_type, length)
}
},
DataType::FixedSizeBinary(value_len) => make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; *value_len as usize * length])],
vec![],
)),
DataType::FixedSizeBinary(value_len) => make_array(unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; *value_len as usize * length])],
vec![],
)
}),
DataType::Binary | DataType::Utf8 => {
new_null_binary_array::<i32>(data_type, length)
}
Expand All @@ -435,19 +439,21 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
DataType::LargeList(field) => {
new_null_list_array::<i64>(data_type, field.data_type(), length)
}
DataType::FixedSizeList(field, value_len) => make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![],
vec![
new_null_array(field.data_type(), *value_len as usize * length)
.data()
.clone(),
],
)),
DataType::FixedSizeList(field, value_len) => make_array(unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![],
vec![
new_null_array(field.data_type(), *value_len as usize * length)
.data()
.clone(),
],
)
}),
DataType::Struct(fields) => {
let fields: Vec<_> = fields
.iter()
Expand All @@ -467,15 +473,17 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
let keys = new_null_array(key, length);
let keys = keys.data();

make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
keys.null_buffer().cloned(),
0,
keys.buffers().into(),
vec![new_empty_array(value.as_ref()).data().clone()],
))
make_array(unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
keys.null_buffer().cloned(),
0,
keys.buffers().into(),
vec![new_empty_array(value.as_ref()).data().clone()],
)
})
}
DataType::Decimal(_, _) => {
unimplemented!("Creating null Decimal array not yet supported")
Expand All @@ -489,52 +497,58 @@ fn new_null_list_array<OffsetSize: OffsetSizeTrait>(
child_data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(
vec![OffsetSize::zero(); length + 1].to_byte_slice(),
)],
vec![ArrayData::new_empty(child_data_type)],
))
make_array(unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(
vec![OffsetSize::zero(); length + 1].to_byte_slice(),
)],
vec![ArrayData::new_empty(child_data_type)],
)
})
}

#[inline]
fn new_null_binary_array<OffsetSize: OffsetSizeTrait>(
data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![
Buffer::from(vec![OffsetSize::zero(); length + 1].to_byte_slice()),
MutableBuffer::new(0).into(),
],
vec![],
))
make_array(unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![
Buffer::from(vec![OffsetSize::zero(); length + 1].to_byte_slice()),
MutableBuffer::new(0).into(),
],
vec![],
)
})
}

#[inline]
fn new_null_sized_array<T: ArrowPrimitiveType>(
data_type: &DataType,
length: usize,
) -> ArrayRef {
make_array(ArrayData::new(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; length * T::get_byte_width()])],
vec![],
))
make_array(unsafe {
ArrayData::new_unchecked(
data_type.clone(),
length,
Some(length),
Some(MutableBuffer::new_null(length).into()),
0,
vec![Buffer::from(vec![0u8; length * T::get_byte_width()])],
vec![],
)
})
}

/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
Expand Down Expand Up @@ -755,7 +769,8 @@ mod tests {
ArrayData::builder(arr.data_type().clone())
.add_buffer(MutableBuffer::new(0).into())
.null_bit_buffer(MutableBuffer::new_null(0).into())
.build(),
.build()
.unwrap(),
);

// expected size is the size of the PrimitiveArray struct,
Expand Down Expand Up @@ -791,8 +806,10 @@ mod tests {
.child_data(vec![ArrayData::builder(DataType::Int64)
.len(values.len())
.buffers(values.data_ref().buffers().to_vec())
.build()])
.build();
.build()
.unwrap()])
.build()
.unwrap();

let empty_data = ArrayData::new_empty(&DataType::Dictionary(
Box::new(DataType::Int16),
Expand Down
Loading

0 comments on commit 058da05

Please sign in to comment.