Skip to content

Commit 52f7bf1

Browse files
[Variant] Make VariantArray iterable (#8613)
# Which issue does this PR close? - Closes #8609 - Closes #8612 This PR introduces an Iterator over `VariantArray`. Since `VariantArray` does not `impl Array`, we can't make use of `ArrayIter`
1 parent 1b17001 commit 52f7bf1

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed

parquet-variant-compute/src/variant_array.rs

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,11 @@ impl VariantArray {
420420
pub fn is_valid(&self, index: usize) -> bool {
421421
!self.is_null(index)
422422
}
423+
424+
/// Returns an iterator over the values in this array
425+
pub fn iter(&self) -> VariantArrayIter<'_> {
426+
VariantArrayIter::new(self)
427+
}
423428
}
424429

425430
impl From<VariantArray> for StructArray {
@@ -434,6 +439,89 @@ impl From<VariantArray> for ArrayRef {
434439
}
435440
}
436441

442+
/// An iterator over [`VariantArray`]
443+
///
444+
/// This iterator returns `Option<Option<Variant<'a, 'a>>>` where:
445+
/// - `None` indicates the end of iteration
446+
/// - `Some(None)` indicates a null value at this position
447+
/// - `Some(Some(variant))` indicates a valid variant value
448+
///
449+
/// # Example
450+
///
451+
/// ```
452+
/// # use parquet_variant::Variant;
453+
/// # use parquet_variant_compute::VariantArrayBuilder;
454+
/// let mut builder = VariantArrayBuilder::new(10);
455+
/// builder.append_variant(Variant::from(42));
456+
/// builder.append_null();
457+
/// builder.append_variant(Variant::from("hello"));
458+
/// let array = builder.build();
459+
///
460+
/// let values = array.iter().collect::<Vec<_>>();
461+
/// assert_eq!(values.len(), 3);
462+
/// assert_eq!(values[0], Some(Variant::from(42)));
463+
/// assert_eq!(values[1], None);
464+
/// assert_eq!(values[2], Some(Variant::from("hello")));
465+
/// ```
466+
#[derive(Debug)]
467+
pub struct VariantArrayIter<'a> {
468+
array: &'a VariantArray,
469+
head_i: usize,
470+
tail_i: usize,
471+
}
472+
473+
impl<'a> VariantArrayIter<'a> {
474+
/// Creates a new iterator over the given [`VariantArray`]
475+
pub fn new(array: &'a VariantArray) -> Self {
476+
Self {
477+
array,
478+
head_i: 0,
479+
tail_i: array.len(),
480+
}
481+
}
482+
483+
fn value_opt(&self, i: usize) -> Option<Variant<'a, 'a>> {
484+
self.array.is_valid(i).then(|| self.array.value(i))
485+
}
486+
}
487+
488+
impl<'a> Iterator for VariantArrayIter<'a> {
489+
type Item = Option<Variant<'a, 'a>>;
490+
491+
#[inline]
492+
fn next(&mut self) -> Option<Self::Item> {
493+
if self.head_i == self.tail_i {
494+
return None;
495+
}
496+
497+
let out = self.value_opt(self.head_i);
498+
499+
self.head_i += 1;
500+
501+
Some(out)
502+
}
503+
504+
fn size_hint(&self) -> (usize, Option<usize>) {
505+
let remainder = self.tail_i - self.head_i;
506+
507+
(remainder, Some(remainder))
508+
}
509+
}
510+
511+
impl<'a> DoubleEndedIterator for VariantArrayIter<'a> {
512+
fn next_back(&mut self) -> Option<Self::Item> {
513+
if self.head_i == self.tail_i {
514+
return None;
515+
}
516+
517+
self.tail_i -= 1;
518+
519+
Some(self.value_opt(self.tail_i))
520+
}
521+
}
522+
523+
impl<'a> ExactSizeIterator for VariantArrayIter<'a> {}
524+
437525
/// One shredded field of a partially or prefectly shredded variant. For example, suppose the
438526
/// shredding schema for variant `v` treats it as an object with a single field `a`, where `a` is
439527
/// itself a struct with the single field `b` of type INT. Then the physical layout of the column
@@ -1048,6 +1136,8 @@ fn canonicalize_and_verify_field(field: &Arc<Field>) -> Result<Cow<'_, Arc<Field
10481136

10491137
#[cfg(test)]
10501138
mod test {
1139+
use crate::VariantArrayBuilder;
1140+
10511141
use super::*;
10521142
use arrow::array::{BinaryViewArray, Int32Array};
10531143
use arrow_schema::{Field, Fields};
@@ -1230,4 +1320,89 @@ mod test {
12301320
}
12311321
));
12321322
}
1323+
1324+
#[test]
1325+
fn test_variant_array_iterable() {
1326+
let mut b = VariantArrayBuilder::new(6);
1327+
1328+
b.append_null();
1329+
b.append_variant(Variant::from(1_i8));
1330+
b.append_variant(Variant::Null);
1331+
b.append_variant(Variant::from(2_i32));
1332+
b.append_variant(Variant::from(3_i64));
1333+
b.append_null();
1334+
1335+
let v = b.build();
1336+
1337+
let variants = v.iter().collect::<Vec<_>>();
1338+
1339+
assert_eq!(
1340+
variants,
1341+
vec![
1342+
None,
1343+
Some(Variant::Int8(1)),
1344+
Some(Variant::Null),
1345+
Some(Variant::Int32(2)),
1346+
Some(Variant::Int64(3)),
1347+
None,
1348+
]
1349+
);
1350+
}
1351+
1352+
#[test]
1353+
fn test_variant_array_iter_double_ended() {
1354+
let mut b = VariantArrayBuilder::new(5);
1355+
1356+
b.append_variant(Variant::from(0_i32));
1357+
b.append_null();
1358+
b.append_variant(Variant::from(2_i32));
1359+
b.append_null();
1360+
b.append_variant(Variant::from(4_i32));
1361+
1362+
let array = b.build();
1363+
let mut iter = array.iter();
1364+
1365+
assert_eq!(iter.next(), Some(Some(Variant::from(0_i32))));
1366+
assert_eq!(iter.next(), Some(None));
1367+
1368+
assert_eq!(iter.next_back(), Some(Some(Variant::from(4_i32))));
1369+
assert_eq!(iter.next_back(), Some(None));
1370+
assert_eq!(iter.next_back(), Some(Some(Variant::from(2_i32))));
1371+
1372+
assert_eq!(iter.next_back(), None);
1373+
assert_eq!(iter.next(), None);
1374+
}
1375+
1376+
#[test]
1377+
fn test_variant_array_iter_reverse() {
1378+
let mut b = VariantArrayBuilder::new(5);
1379+
1380+
b.append_variant(Variant::from("a"));
1381+
b.append_null();
1382+
b.append_variant(Variant::from("aaa"));
1383+
b.append_null();
1384+
b.append_variant(Variant::from("aaaaa"));
1385+
1386+
let array = b.build();
1387+
1388+
let result: Vec<_> = array.iter().rev().collect();
1389+
assert_eq!(
1390+
result,
1391+
vec![
1392+
Some(Variant::from("aaaaa")),
1393+
None,
1394+
Some(Variant::from("aaa")),
1395+
None,
1396+
Some(Variant::from("a")),
1397+
]
1398+
);
1399+
}
1400+
1401+
#[test]
1402+
fn test_variant_array_iter_empty() {
1403+
let v = VariantArrayBuilder::new(0).build();
1404+
let mut i = v.iter();
1405+
assert!(i.next().is_none());
1406+
assert!(i.next_back().is_none());
1407+
}
12331408
}

0 commit comments

Comments
 (0)