Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Nested boolean
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jan 28, 2022
1 parent 9ea3406 commit 48b6fb9
Show file tree
Hide file tree
Showing 8 changed files with 631 additions and 203 deletions.
4 changes: 2 additions & 2 deletions src/io/parquet/read/binary/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ fn read<O: Offset>(

match (rep_level_encoding.0, def_level_encoding.0) {
(Encoding::Rle, Encoding::Rle) => {
let rep_levels =
HybridRleDecoder::new(rep_levels, get_bit_width(rep_level_encoding.1), additional);
if is_nullable {
let def_levels = HybridRleDecoder::new(
def_levels,
Expand All @@ -79,6 +77,8 @@ fn read<O: Offset>(
read_plain_required(values_buffer, additional, values)
}

let rep_levels =
HybridRleDecoder::new(rep_levels, get_bit_width(rep_level_encoding.1), additional);
let def_levels =
HybridRleDecoder::new(def_levels, get_bit_width(def_level_encoding.1), additional);

Expand Down
60 changes: 24 additions & 36 deletions src/io/parquet/read/boolean/mod.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,17 @@
mod basic;
mod nested;

use std::sync::Arc;

use crate::{
array::{Array, BooleanArray},
bitmap::MutableBitmap,
datatypes::DataType,
array::Array,
datatypes::{DataType, Field},
error::Result,
};

use parquet2::{metadata::ColumnDescriptor, page::DataPage};

mod basic;
mod nested;

use self::basic::BooleanArrayIterator;

use super::{nested_utils::Nested, DataPages};

fn page_to_array_nested(
page: &DataPage,
descriptor: &ColumnDescriptor,
data_type: DataType,
nested: &mut Vec<Box<dyn Nested>>,
is_nullable: bool,
) -> Result<BooleanArray> {
let capacity = page.num_values() as usize;
let mut values = MutableBitmap::with_capacity(capacity);
let mut validity = MutableBitmap::with_capacity(capacity);
nested::extend_from_page(
page,
descriptor,
is_nullable,
nested,
&mut values,
&mut validity,
)?;

Ok(BooleanArray::from_data(
data_type,
values.into(),
validity.into(),
))
}
use self::nested::ArrayIterator;
use super::{nested_utils::NestedState, DataPages};

/// Converts [`DataPages`] to an [`Iterator`] of [`Array`]
pub fn iter_to_arrays<'a, I: 'a>(
Expand All @@ -57,3 +28,20 @@ where
.map(|x| x.map(|x| Arc::new(x) as Arc<dyn Array>)),
)
}

/// Converts [`DataPages`] to an [`Iterator`] of [`Array`]
pub fn iter_to_arrays_nested<'a, I: 'a>(
iter: I,
field: Field,
chunk_size: usize,
) -> Box<dyn Iterator<Item = Result<(NestedState, Arc<dyn Array>)>> + 'a>
where
I: DataPages,
{
Box::new(ArrayIterator::new(iter, field, chunk_size).map(|x| {
x.map(|(nested, array)| {
let values = Arc::new(array) as Arc<dyn Array>;
(nested, values)
})
}))
}
Loading

0 comments on commit 48b6fb9

Please sign in to comment.