Skip to content

Commit

Permalink
Fix undefined behavor in GenericStringArray::from_iter_values (#1145)
Browse files Browse the repository at this point in the history
* Fix undefined behavor in GenericStringArray::from_iter_values

* Cleanup code and tests

* clippy

* Fix test
  • Loading branch information
alamb authored Jan 10, 2022
1 parent 719096b commit 35b04ca
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 1 deletion.
73 changes: 72 additions & 1 deletion arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,13 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
offsets.push(length_so_far);
values.extend_from_slice(s.as_bytes());
}

// iterator size hint may not be correct so compute the actual number of offsets
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;

let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
.len(data_len)
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
let array_data = unsafe { array_data.build_unchecked() };
Expand Down Expand Up @@ -572,4 +577,70 @@ mod tests {
.validate_full()
.expect("All null array has valid array data");
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_collect_string() {
use crate::util::test_util::BadIterator;
let data = vec![Some("foo"), None, Some("bar")];
let expected: StringArray = data.clone().into_iter().collect();

// Iterator reports too many items
let arr: StringArray = BadIterator::new(3, 10, data.clone()).collect();
assert_eq!(expected, arr);

// Iterator reports too few items
let arr: StringArray = BadIterator::new(3, 1, data.clone()).collect();
assert_eq!(expected, arr);
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_collect_large_string() {
use crate::util::test_util::BadIterator;
let data = vec![Some("foo"), None, Some("bar")];
let expected: LargeStringArray = data.clone().into_iter().collect();

// Iterator reports too many items
let arr: LargeStringArray = BadIterator::new(3, 10, data.clone()).collect();
assert_eq!(expected, arr);

// Iterator reports too few items
let arr: LargeStringArray = BadIterator::new(3, 1, data.clone()).collect();
assert_eq!(expected, arr);
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_iter_values_string() {
use crate::util::test_util::BadIterator;
let data = vec!["foo", "bar", "baz"];
let expected: StringArray = data.clone().into_iter().map(Some).collect();

// Iterator reports too many items
let arr = StringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
assert_eq!(expected, arr);

// Iterator reports too few items
let arr = StringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
assert_eq!(expected, arr);
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_iter_values_large_string() {
use crate::util::test_util::BadIterator;
let data = vec!["foo", "bar", "baz"];
let expected: LargeStringArray = data.clone().into_iter().map(Some).collect();

// Iterator reports too many items
let arr =
LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
assert_eq!(expected, arr);

// Iterator reports too few items
let arr =
LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
assert_eq!(expected, arr);
}
}
48 changes: 48 additions & 0 deletions arrow/src/util/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,54 @@ fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result<PathBuf, Box<dyn
}
}

/// An iterator that is untruthful about its actual length
#[derive(Debug, Clone)]
pub struct BadIterator<T> {
/// where the iterator currently is
cur: usize,
/// How many items will this iterator *actually* make
limit: usize,
/// How many items this iterator claims it will make
claimed: usize,
/// The items to return. If there are fewer items than `limit`
/// they will be repeated
pub items: Vec<T>,
}

impl<T> BadIterator<T> {
/// Create a new iterator for <limit> items, but that reports to
/// produce <claimed> items. Must provide at least 1 item.
pub fn new(limit: usize, claimed: usize, items: Vec<T>) -> Self {
assert!(!items.is_empty());
Self {
cur: 0,
limit,
claimed,
items,
}
}
}

impl<T: Clone> Iterator for BadIterator<T> {
type Item = T;

fn next(&mut self) -> Option<Self::Item> {
if self.cur < self.limit {
let next_item_idx = self.cur % self.items.len();
let next_item = self.items[next_item_idx].clone();
self.cur += 1;
Some(next_item)
} else {
None
}
}

/// report whatever the iterator says to
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.claimed as usize))
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down

0 comments on commit 35b04ca

Please sign in to comment.