Skip to content

Commit

Permalink
create BinaryArray directly from byte slice to prevent converting to …
Browse files Browse the repository at this point in the history
…String > &str > &[u8]
  • Loading branch information
nevi-me committed Feb 24, 2019
1 parent b20ea6d commit 2a389a3
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 11 deletions.
50 changes: 50 additions & 0 deletions rust/arrow/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,26 @@ impl<'a> From<Vec<&'a str>> for BinaryArray {
}
}

impl<'a> From<Vec<&[u8]>> for BinaryArray {
fn from(v: Vec<&[u8]>) -> Self {
let mut offsets = vec![];
let mut values = vec![];
let mut length_so_far = 0;
offsets.push(length_so_far);
for s in &v {
length_so_far += s.len() as i32;
offsets.push(length_so_far as i32);
values.extend_from_slice(s);
}
let array_data = ArrayData::builder(DataType::Utf8)
.len(v.len())
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.build();
BinaryArray::from(array_data)
}
}

/// Creates a `BinaryArray` from `List<u8>` array
impl From<ListArray> for BinaryArray {
fn from(v: ListArray) -> Self {
Expand Down Expand Up @@ -1155,6 +1175,36 @@ mod tests {
}
}

#[test]
fn test_binary_array_from_u8_slice() {
let values: Vec<&[u8]> = vec![
&[b'h', b'e', b'l', b'l', b'o'],
&[],
&[ b'p', b'a', b'r', b'q', b'u', b'e', b't']
];

// Array data: ["hello", "", "parquet"]
let binary_array = BinaryArray::from(values);

assert_eq!(3, binary_array.len());
assert_eq!(0, binary_array.null_count());
assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
assert_eq!("hello", binary_array.get_string(0));
assert_eq!([] as [u8; 0], binary_array.value(1));
assert_eq!("", binary_array.get_string(1));
assert_eq!(
[b'p', b'a', b'r', b'q', b'u', b'e', b't'],
binary_array.value(2)
);
assert_eq!("parquet", binary_array.get_string(2));
assert_eq!(5, binary_array.value_offset(2));
assert_eq!(7, binary_array.value_length(2));
for i in 0..3 {
assert!(binary_array.is_valid(i));
assert!(!binary_array.is_null(i));
}
}

#[test]
#[should_panic(
expected = "BinaryArray can only be created from List<u8> arrays, mismatched \
Expand Down
17 changes: 6 additions & 11 deletions rust/arrow/src/compute/array_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,17 +236,14 @@ pub fn filter(array: &Array, filter: &BooleanArray) -> Result<ArrayRef> {
DataType::Float64 => filter_array!(array, filter, Float64Array),
DataType::Boolean => filter_array!(array, filter, BooleanArray),
DataType::Utf8 => {
//TODO: this is inefficient and we should improve the Arrow impl to help make
// this more concise
let b = array.as_any().downcast_ref::<BinaryArray>().unwrap();
let mut values: Vec<String> = Vec::with_capacity(b.len());
let mut values: Vec<&[u8]> = Vec::with_capacity(b.len());
for i in 0..b.len() {
if filter.value(i) {
values.push(b.get_string(i));
values.push(b.value(i));
}
}
let tmp: Vec<&str> = values.iter().map(|s| s.as_str()).collect();
Ok(Arc::new(BinaryArray::from(tmp)))
Ok(Arc::new(BinaryArray::from(values)))
}
other => Err(ArrowError::ComputeError(format!(
"filter not supported for {:?}",
Expand Down Expand Up @@ -288,14 +285,12 @@ pub fn limit(array: &Array, num_rows_to_read: usize) -> Result<ArrayRef> {
DataType::Float64 => limit_array!(array, num_rows_to_read, Float64Array),
DataType::Boolean => limit_array!(array, num_rows_to_read, BooleanArray),
DataType::Utf8 => {
//TODO: this is inefficient and we should improve the Arrow impl to help make this more concise
let b = array.as_any().downcast_ref::<BinaryArray>().unwrap();
let mut values: Vec<String> = Vec::with_capacity(num_rows_to_read as usize);
let mut values: Vec<&[u8]> = Vec::with_capacity(num_rows_to_read as usize);
for i in 0..num_rows_to_read {
values.push(b.get_string(i));
values.push(b.value(i));
}
let tmp: Vec<&str> = values.iter().map(|s| s.as_str()).collect();
Ok(Arc::new(BinaryArray::from(tmp)))
Ok(Arc::new(BinaryArray::from(values)))
}
other => Err(ArrowError::ComputeError(format!(
"limit not supported for {:?}",
Expand Down

0 comments on commit 2a389a3

Please sign in to comment.