Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed May 26, 2022
1 parent ab0bf6a commit cddbbad
Show file tree
Hide file tree
Showing 12 changed files with 704 additions and 702 deletions.
18 changes: 8 additions & 10 deletions src/io/parquet/write/binary/nested.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,31 @@
use parquet2::schema::types::PrimitiveType;
use parquet2::{encoding::Encoding, page::DataPage};

use super::super::{levels, utils, WriteOptions};
use super::super::{nested, utils, WriteOptions};
use super::basic::{build_statistics, encode_plain};
use crate::io::parquet::read::schema::is_nullable;
use crate::io::parquet::write::Nested;
use crate::{
array::{Array, BinaryArray, Offset},
error::Result,
};

pub fn array_to_page<O, OO>(
pub fn array_to_page<O>(
array: &BinaryArray<O>,
options: WriteOptions,
type_: PrimitiveType,
nested: levels::NestedInfo<OO>,
nested: Vec<Nested>,
) -> Result<DataPage>
where
OO: Offset,
O: Offset,
{
let is_optional = is_nullable(&type_.field_info);

let validity = array.validity();

let mut buffer = vec![];
levels::write_rep_levels(&mut buffer, &nested, options.version)?;
nested::write_rep_levels(&mut buffer, &nested, options.version)?;
let repetition_levels_byte_length = buffer.len();

levels::write_def_levels(&mut buffer, &nested, validity, is_optional, options.version)?;
nested::write_def_levels(&mut buffer, &nested, options.version)?;
let definition_levels_byte_length = buffer.len() - repetition_levels_byte_length;

encode_plain(array, is_optional, &mut buffer);
Expand All @@ -40,8 +38,8 @@ where

utils::build_plain_page(
buffer,
levels::num_values(nested.offsets()),
nested.offsets().len().saturating_sub(1),
nested::num_values(&nested),
nested[0].len(),
array.null_count(),
repetition_levels_byte_length,
definition_levels_byte_length,
Expand Down
24 changes: 10 additions & 14 deletions src/io/parquet/write/boolean/nested.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
use parquet2::schema::types::PrimitiveType;
use parquet2::{encoding::Encoding, page::DataPage};

use super::super::{levels, utils, WriteOptions};
use super::super::{nested, utils, WriteOptions};
use super::basic::{build_statistics, encode_plain};
use crate::io::parquet::read::schema::is_nullable;
use crate::io::parquet::write::Nested;
use crate::{
array::{Array, BooleanArray, Offset},
array::{Array, BooleanArray},
error::Result,
};

pub fn array_to_page<O>(
pub fn array_to_page(
array: &BooleanArray,
options: WriteOptions,
type_: PrimitiveType,
nested: levels::NestedInfo<O>,
) -> Result<DataPage>
where
O: Offset,
{
nested: Vec<Nested>,
) -> Result<DataPage> {
let is_optional = is_nullable(&type_.field_info);

let validity = array.validity();

let mut buffer = vec![];
levels::write_rep_levels(&mut buffer, &nested, options.version)?;
nested::write_rep_levels(&mut buffer, &nested, options.version)?;
let repetition_levels_byte_length = buffer.len();

levels::write_def_levels(&mut buffer, &nested, validity, is_optional, options.version)?;
nested::write_def_levels(&mut buffer, &nested, options.version)?;
let definition_levels_byte_length = buffer.len() - repetition_levels_byte_length;

encode_plain(array, is_optional, &mut buffer)?;
Expand All @@ -39,8 +35,8 @@ where

utils::build_plain_page(
buffer,
levels::num_values(nested.offsets()),
nested.offsets().len().saturating_sub(1),
nested::num_values(&nested),
nested[0].len(),
array.null_count(),
repetition_levels_byte_length,
definition_levels_byte_length,
Expand Down
Loading

0 comments on commit cddbbad

Please sign in to comment.